summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/uv.c20
-rw-r--r--arch/s390/include/asm/gmap.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h117
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/page.h5
-rw-r--r--arch/s390/include/asm/pgtable.h35
-rw-r--r--arch/s390/include/asm/uv.h251
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/pgm_check.S4
-rw-r--r--arch/s390/kernel/setup.c9
-rw-r--r--arch/s390/kernel/uv.c414
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c6
-rw-r--r--arch/s390/kvm/gaccess.c23
-rw-r--r--arch/s390/kvm/intercept.c123
-rw-r--r--arch/s390/kvm/interrupt.c401
-rw-r--r--arch/s390/kvm/kvm-s390.c597
-rw-r--r--arch/s390/kvm/kvm-s390.h51
-rw-r--r--arch/s390/kvm/priv.c13
-rw-r--r--arch/s390/kvm/pv.c303
-rw-r--r--arch/s390/mm/fault.c78
-rw-r--r--arch/s390/mm/gmap.c72
25 files changed, 2203 insertions, 335 deletions
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ff9261c915e..45b33b83de08 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o text_dma.o
-obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 3f501159ee9f..8fde561f1d07 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -3,7 +3,13 @@
#include <asm/facility.h>
#include <asm/sections.h>
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+struct uv_info __bootdata_preserved(uv_info);
+#endif
void uv_query_info(void)
{
@@ -19,7 +25,21 @@ void uv_query_info(void)
if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
+ if (IS_ENABLED(CONFIG_KVM)) {
+ memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+ uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+ uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+ uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+ uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+ uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+ uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+ uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+ uv_info.max_guest_cpus = uvcb.max_guest_cpus;
+ }
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
+#endif
}
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6f0e61..a816fb4734b8 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+#include <linux/radix-tree.h>
#include <linux/refcount.h>
/* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
* @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@ struct gmap {
unsigned long asce_end;
void *private;
bool pfault_enabled;
+ /* only set for protected virtual machines */
+ unsigned long guest_handle;
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
@@ -144,4 +148,6 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_reset_acc(struct mm_struct *mm);
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1726224e7772..d6bcd34f3ec3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -127,6 +127,12 @@ struct mcck_volatile_info {
#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
CR14_EXTERNAL_DAMAGE_SUBMASK)
+#define SIDAD_SIZE_MASK 0xff
+#define sida_origin(sie_block) \
+ ((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -160,7 +166,13 @@ struct kvm_s390_sie_block {
__u8 reserved08[4]; /* 0x0008 */
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
- __u8 reserved10[16]; /* 0x0010 */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
#define PROG_BLOCK_SIE (1<<0)
#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
@@ -209,10 +221,23 @@ struct kvm_s390_sie_block {
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
- __u8 reserved54[2]; /* 0x0054 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
@@ -233,7 +258,7 @@ struct kvm_s390_sie_block {
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
- __u8 reserved68; /* 0x0068 */
+ __u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
@@ -249,31 +274,58 @@ struct kvm_s390_sie_block {
#define HPID_KVM 0x4
#define HPID_VSIE 0x5
__u8 hpid; /* 0x00b8 */
- __u8 reservedb9[11]; /* 0x00b9 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
__u32 reservedc8; /* 0x00c8 */
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
- __u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[12]; /* 0x00f0 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
#define CRYCB_FORMAT_MASK 0x00000003
#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
- __u64 gbea; /* 0x0180 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
__u8 reserved188[8]; /* 0x0188 */
__u64 sdnxo; /* 0x0190 */
__u8 reserved198[8]; /* 0x0198 */
@@ -292,7 +344,7 @@ struct kvm_s390_sie_block {
__u64 itdba; /* 0x01e8 */
__u64 riccbd; /* 0x01f0 */
__u64 gvrd; /* 0x01f8 */
-} __attribute__((packed));
+} __packed __aligned(512);
struct kvm_s390_itdb {
__u8 data[256];
@@ -301,7 +353,9 @@ struct kvm_s390_itdb {
struct sie_page {
struct kvm_s390_sie_block sie_block;
struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[1000]; /* 0x0218 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
};
@@ -476,6 +530,7 @@ enum irq_types {
IRQ_PEND_PFAULT_INIT,
IRQ_PEND_EXT_HOST,
IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_SERVICE_EV,
IRQ_PEND_EXT_TIMING,
IRQ_PEND_EXT_CPU_TIMER,
IRQ_PEND_EXT_CLOCK_COMP,
@@ -520,6 +575,7 @@ enum irq_types {
(1UL << IRQ_PEND_EXT_TIMING) | \
(1UL << IRQ_PEND_EXT_HOST) | \
(1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
(1UL << IRQ_PEND_VIRTIO) | \
(1UL << IRQ_PEND_PFAULT_INIT) | \
(1UL << IRQ_PEND_PFAULT_DONE))
@@ -536,6 +592,13 @@ enum irq_types {
#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
(1UL << IRQ_PEND_MCHK_EX))
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER) | \
+ (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+ (1UL << IRQ_PEND_EXT_EMERGENCY) | \
+ (1UL << IRQ_PEND_EXT_EXTERNAL) | \
+ (1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -594,6 +657,7 @@ struct kvm_s390_local_interrupt {
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
+ unsigned long masked_irqs;
spinlock_t lock;
struct list_head lists[FIRQ_LIST_COUNT];
int counters[FIRQ_MAX_COUNT];
@@ -645,6 +709,11 @@ struct kvm_guestdbg_info_arch {
unsigned long last_bp;
};
+struct kvm_s390_pv_vcpu {
+ u64 handle;
+ unsigned long stor_base;
+};
+
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
/* if vsie is active, currently executed shadow sie control block */
@@ -673,6 +742,7 @@ struct kvm_vcpu_arch {
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
+ struct kvm_s390_pv_vcpu pv;
};
struct kvm_vm_stat {
@@ -701,9 +771,6 @@ struct s390_io_adapter {
bool masked;
bool swap;
bool suppressible;
- struct rw_semaphore maps_lock;
- struct list_head maps;
- atomic_t nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
@@ -846,6 +913,13 @@ struct kvm_s390_gisa_interrupt {
DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
};
+struct kvm_s390_pv {
+ u64 handle;
+ u64 guest_len;
+ unsigned long stor_base;
+ void *stor_var;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -881,6 +955,7 @@ struct kvm_arch{
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
+ struct kvm_s390_pv pv;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -921,7 +996,7 @@ static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+ struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb6371086f..e21b618ad432 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@ typedef struct {
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* The mmu context belongs to a secure guest. */
+ atomic_t is_protected;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f3f796..afa836014076 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
+ atomic_set(&mm->context.is_protected, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
mm->context.compat_mm = test_thread_flag(TIF_31BIT);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 1019efd85b9d..62440a82731a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6d7c3b7e9281..6076c8c912d2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <asm/bug.h>
#include <asm/page.h>
+#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
@@ -520,6 +521,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
return 0;
}
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(atomic_read(&mm->context.is_protected)))
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_alloc_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1067,7 +1077,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1079,7 +1094,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(vma->vm_mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
/*
@@ -1094,12 +1114,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, int full)
{
+ pte_t res;
+
if (full) {
- pte_t pte = *ptep;
+ res = *ptep;
*ptep = __pte(_PAGE_INVALID);
- return pte;
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 4093a2856929..cff4b4c99b75 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -14,23 +14,62 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/bug.h>
+#include <linux/sched.h>
#include <asm/page.h>
+#include <asm/gmap.h>
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
#define UVC_RC_INV_STATE 0x0003
#define UVC_RC_INV_LEN 0x0005
#define UVC_RC_NO_RESUME 0x0007
+#define UVC_RC_NEED_DESTROY 0x8000
#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_INIT_UV 0x000f
+#define UVC_CMD_CREATE_SEC_CONF 0x0100
+#define UVC_CMD_DESTROY_SEC_CONF 0x0101
+#define UVC_CMD_CREATE_SEC_CPU 0x0120
+#define UVC_CMD_DESTROY_SEC_CPU 0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300
+#define UVC_CMD_UNPACK_IMG 0x0301
+#define UVC_CMD_VERIFY_IMG 0x0302
+#define UVC_CMD_CPU_RESET 0x0310
+#define UVC_CMD_CPU_RESET_INITIAL 0x0311
+#define UVC_CMD_PREPARE_RESET 0x0320
+#define UVC_CMD_CPU_RESET_CLEAR 0x0321
+#define UVC_CMD_CPU_SET_STATE 0x0330
+#define UVC_CMD_SET_UNSHARE_ALL 0x0340
+#define UVC_CMD_PIN_PAGE_SHARED 0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
/* Bits in installed uv calls */
enum uv_cmds_inst {
BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_INIT_UV = 1,
+ BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+ BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+ BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+ BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+ BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+ BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+ BIT_UVC_CMD_SET_SEC_PARMS = 11,
+ BIT_UVC_CMD_UNPACK_IMG = 13,
+ BIT_UVC_CMD_VERIFY_IMG = 14,
+ BIT_UVC_CMD_CPU_RESET = 15,
+ BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+ BIT_UVC_CMD_CPU_SET_STATE = 17,
+ BIT_UVC_CMD_PREPARE_RESET = 18,
+ BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+ BIT_UVC_CMD_UNSHARE_ALL = 20,
+ BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+ BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
};
struct uv_cb_header {
@@ -40,13 +79,127 @@ struct uv_cb_header {
u16 rrc; /* Return Reason Code */
} __packed __aligned(8);
+/* Query Ultravisor Information */
struct uv_cb_qui {
struct uv_cb_header header;
u64 reserved08;
u64 inst_calls_list[4];
- u64 reserved30[15];
+ u64 reserved30[2];
+ u64 uv_base_stor_len;
+ u64 reserved48;
+ u64 conf_base_phys_stor_len;
+ u64 conf_base_virt_stor_len;
+ u64 conf_virt_var_stor_len;
+ u64 cpu_stor_len;
+ u32 reserved70[3];
+ u32 max_num_sec_conf;
+ u64 max_guest_stor_addr;
+ u8 reserved88[158 - 136];
+ u16 max_guest_cpus;
+ u8 reserveda0[200 - 160];
} __packed __aligned(8);
+/* Initialize Ultravisor */
+struct uv_cb_init {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 stor_origin;
+ u64 stor_len;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 conf_base_stor_origin;
+ u64 conf_virt_stor_origin;
+ u64 reserved30;
+ u64 guest_stor_origin;
+ u64 guest_stor_len;
+ u64 guest_sca;
+ u64 guest_asce;
+ u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 guest_handle;
+ u64 stor_origin;
+ u8 reserved30[6];
+ u16 num;
+ u64 state_origin;
+ u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 sec_header_origin;
+ u32 sec_header_len;
+ u32 reserved2c;
+ u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+ u64 tweak[2];
+ u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR 1
+#define PV_CPU_STATE_STP 2
+#define PV_CPU_STATE_CHKSTP 3
+#define PV_CPU_STATE_OPR_LOAD 5
+
+struct uv_cb_cpu_set_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u8 reserved20[7];
+ u8 state;
+ u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 handle;
+ u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Set Shared Access */
struct uv_cb_share {
struct uv_cb_header header;
u64 reserved08[3];
@@ -54,21 +207,76 @@ struct uv_cb_share {
u64 reserved28;
} __packed __aligned(8);
-static inline int uv_call(unsigned long r1, unsigned long r2)
+static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
asm volatile(
- "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
- " brc 3,0b\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
+ " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
: [cc] "=d" (cc)
: [r1] "a" (r1), [r2] "a" (r2)
: "memory", "cc");
return cc;
}
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ } while (cc > 1);
+ return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ cond_resched();
+ } while (cc > 1);
+ return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_nodata uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .handle = handle,
+ };
+ int cc;
+
+ WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+ unsigned long inst_calls_list[4];
+ unsigned long uv_base_stor_len;
+ unsigned long guest_base_stor_len;
+ unsigned long guest_virt_base_stor_len;
+ unsigned long guest_virt_var_stor_len;
+ unsigned long guest_cpu_stor_len;
+ unsigned long max_sec_stor_addr;
+ unsigned int max_num_sec_conf;
+ unsigned short max_guest_cpus;
+};
+
+extern struct uv_info uv_info;
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@@ -121,11 +329,40 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-void uv_query_info(void);
#else
#define is_prot_virt_guest() 0
static inline int uv_set_shared(unsigned long addr) { return 0; }
static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+ return prot_virt_host;
+}
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+void adjust_to_uv_max(unsigned long *vmax);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
static inline void uv_query_info(void) {}
#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 578a6fa82ea4..33d4de233c5b 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 1d3927e01a5f..faca269d5f27 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -24,6 +24,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index eee3a482195a..2c27907a5ffc 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */
PGM_CHECK(do_dat_exception) /* 3a */
PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK(do_secure_storage_access) /* 3d */
+PGM_CHECK(do_non_secure_storage_access) /* 3e */
PGM_CHECK_DEFAULT /* 3f */
PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b2c2f75860e8..1423090a2259 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -92,10 +92,6 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-int __bootdata_preserved(prot_virt_guest);
-#endif
-
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
@@ -564,6 +560,9 @@ static void __init setup_memory_end(void)
vmax = _REGION1_SIZE; /* 4-level kernel page table */
}
+ if (is_prot_virt_host())
+ adjust_to_uv_max(&vmax);
+
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -1138,6 +1137,8 @@ void __init setup_arch(char **cmdline_p)
*/
memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+ if (is_prot_virt_host())
+ setup_uv();
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 000000000000..c86d654351d1
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+int prot_virt_host;
+EXPORT_SYMBOL(prot_virt_host);
+struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
+
+static int __init prot_virt_setup(char *val)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(val, &enabled);
+ if (!rc && enabled)
+ prot_virt_host = 1;
+
+ if (is_prot_virt_guest() && prot_virt_host) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not available in protected guests.");
+ }
+
+ if (prot_virt_host && !test_facility(158)) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not supported by the hardware.");
+ }
+
+ return rc;
+}
+early_param("prot_virt", prot_virt_setup);
+
+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+{
+ struct uv_cb_init uvcb = {
+ .header.cmd = UVC_CMD_INIT_UV,
+ .header.len = sizeof(uvcb),
+ .stor_origin = stor_base,
+ .stor_len = stor_len,
+ };
+
+ if (uv_call(0, (uint64_t)&uvcb)) {
+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+ uvcb.header.rc, uvcb.header.rrc);
+ return -1;
+ }
+ return 0;
+}
+
+void __init setup_uv(void)
+{
+ unsigned long uv_stor_base;
+
+ uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+ if (!uv_stor_base) {
+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+ uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ pr_info("Reserving %luMB as ultravisor base storage\n",
+ uv_info.uv_base_stor_len >> 20);
+ return;
+fail:
+ pr_info("Disabling support for protected virtualization");
+ prot_virt_host = 0;
+}
+
+void adjust_to_uv_max(unsigned long *vmax)
+{
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+ int res;
+
+ res = page_mapcount(page);
+ if (PageSwapCache(page)) {
+ res++;
+ } else if (page_mapping(page)) {
+ res++;
+ if (page_has_private(page))
+ res++;
+ }
+ return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+ struct page *exp_page, struct uv_cb_header *uvcb)
+{
+ pte_t entry = READ_ONCE(*ptep);
+ struct page *page;
+ int expected, rc = 0;
+
+ if (!pte_present(entry))
+ return -ENXIO;
+ if (pte_val(entry) & _PAGE_INVALID)
+ return -ENXIO;
+
+ page = pte_page(entry);
+ if (page != exp_page)
+ return -ENXIO;
+ if (PageWriteback(page))
+ return -EAGAIN;
+ expected = expected_page_refs(page);
+ if (!page_ref_freeze(page, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &page->flags);
+ rc = uv_call(0, (u64)uvcb);
+ page_ref_unfreeze(page, expected);
+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+ if (rc)
+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+ return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct vm_area_struct *vma;
+ bool local_drain = false;
+ spinlock_t *ptelock;
+ unsigned long uaddr;
+ struct page *page;
+ pte_t *ptep;
+ int rc;
+
+again:
+ rc = -EFAULT;
+ down_read(&gmap->mm->mmap_sem);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = find_vma(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later
+ * on this will result in a segmentation fault.
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = -ENXIO;
+ page = follow_page(vma, uaddr, FOLL_WRITE);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+
+ lock_page(page);
+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ rc = make_secure_pte(ptep, uaddr, page, uvcb);
+ pte_unmap_unlock(ptep, ptelock);
+ unlock_page(page);
+out:
+ up_read(&gmap->mm->mmap_sem);
+
+ if (rc == -EAGAIN) {
+ wait_on_page_writeback(page);
+ } else if (rc == -EBUSY) {
+ /*
+ * If we have tried a local drain and the page refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (local_drain) {
+ lru_add_drain_all();
+ /* We give up here, and let the caller try again */
+ return -EAGAIN;
+ }
+ /*
+ * We are here if the page refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ local_drain = true;
+ /* And now we try again immediately after draining */
+ goto again;
+ } else if (rc == -ENXIO) {
+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+ int rc = 0;
+
+ /* Hugepage cannot be protected, so nothing to do */
+ if (PageHuge(page))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 3 places:
+ * 1. for kernel page tables during early boot
+ * 2. for storage keys of huge pages and KVM
+ * 3. As an indication that this page might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never huge, all 3 variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &page->flags))
+ return 0;
+
+ rc = uv_pin_shared(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+ __ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_guest_cpus);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+ &uv_query_facilities_attr.attr,
+ &uv_query_max_guest_cpus_attr.attr,
+ &uv_query_max_guest_vms_attr.attr,
+ &uv_query_max_guest_addr_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+ .attrs = uv_query_attrs,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!test_facility(158))
+ return 0;
+
+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+ if (!uv_kobj)
+ return -ENOMEM;
+
+ uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+ if (!uv_query_kset)
+ goto out_kobj;
+
+ rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+ if (!rc)
+ return 0;
+
+ kset_unregister(uv_query_kset);
+out_kobj:
+ kobject_del(uv_kobj);
+ kobject_put(uv_kobj);
+ return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a5ea08..12decca22e7c 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3fb54ec2cf3e..563429dece03 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -2,7 +2,7 @@
/*
* handling diagnose instructions
*
- * Copyright IBM Corp. 2008, 2011
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -201,6 +201,10 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ffcfa41..47a67a958107 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -505,7 +505,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (prot) {
case PROT_TYPE_IEP:
tec->b61 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -514,12 +514,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
break;
case PROT_TYPE_ALC:
tec->b60 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_DAT:
tec->b61 = 1;
break;
}
- /* FALL THROUGH */
+ fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
@@ -534,7 +534,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
tec->addr = gva >> PAGE_SHIFT;
tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
- /* FALL THROUGH */
+ fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_VALIDITY:
@@ -677,7 +677,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rfte.p;
ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -695,7 +695,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rste.p;
ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -723,7 +723,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -1050,7 +1050,8 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1076,7 +1077,8 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1111,7 +1113,8 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a389fa85cca2..e7a7c499a73f 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -2,7 +2,7 @@
/*
* in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008, 2014
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -12,10 +12,10 @@
#include <linux/errno.h>
#include <linux/pagemap.h>
-#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include <asm/irq.h>
#include <asm/sysinfo.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -79,6 +79,10 @@ static int handle_stop(struct kvm_vcpu *vcpu)
return rc;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
return -EOPNOTSUPP;
@@ -231,6 +235,13 @@ static int handle_prog(struct kvm_vcpu *vcpu)
vcpu->stat.exit_program_interruption++;
+ /*
+ * Intercept 8 indicates a loop of specification exceptions
+ * for protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EOPNOTSUPP;
+
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
@@ -384,7 +395,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
goto out;
}
- if (addr & ~PAGE_MASK)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
@@ -395,10 +406,15 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
out:
if (!cc) {
- r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
- if (r) {
- free_page((unsigned long)sctns);
- return kvm_s390_inject_prog_cond(vcpu, r);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
+ sctns, PAGE_SIZE);
+ } else {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
}
}
@@ -444,6 +460,77 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
}
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+ u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+
+ kvm_s390_set_prefix(vcpu, pref);
+ trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+ return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ /*
+ * 2 cases:
+ * a: an sccb answering interrupt was already pending or in flight.
+ * As the sccb value is not known we can simply set some value to
+ * trigger delivery of a saved SCCB. UV will then use its saved
+ * copy of the SCCB value.
+ * b: an error SCCB interrupt needs to be injected so we also inject
+ * a fake SCCB address. Firmware will use the proper one.
+ * This makes sure, that both errors and real sccb returns will only
+ * be delivered after a notification intercept (instruction has
+ * finished) but not after others.
+ */
+ fi->srv_signal.ext_params |= 0x43000;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+ struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(vcpu->kvm),
+ .gaddr = guest_uvcb->paddr,
+ };
+ int rc;
+
+ if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+ WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+ guest_uvcb->header.cmd);
+ return 0;
+ }
+ rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ /*
+ * If the unpin did not succeed, the guest will exit again for the UVC
+ * and we will retry the unpin.
+ */
+ if (rc == -EINVAL)
+ return 0;
+ return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb210)
+ return handle_pv_spx(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb220)
+ return handle_pv_sclp(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ return handle_pv_uvc(vcpu);
+
+ return handle_instruction(vcpu);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@@ -480,6 +567,28 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
case ICPT_KSS:
rc = kvm_s390_skey_check_enable(vcpu);
break;
+ case ICPT_MCHKREQ:
+ case ICPT_INT_ENABLE:
+ /*
+ * PSW bit 13 or a CR (0, 6, 14) changed and we might
+ * now be able to deliver interrupts. The pre-run code
+ * will take care of this.
+ */
+ rc = 0;
+ break;
+ case ICPT_PV_INSTR:
+ rc = handle_instruction(vcpu);
+ break;
+ case ICPT_PV_NOTIFY:
+ rc = handle_pv_notification(vcpu);
+ break;
+ case ICPT_PV_PREF:
+ rc = 0;
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu));
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c06c89d370a7..8191106bf7b9 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2,7 +2,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008, 2015
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -324,8 +324,11 @@ static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
+
+ pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+ return pending;
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
@@ -383,10 +386,18 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
- if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+ __clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+ }
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /* PV guest cpus can have a single interruption injected at a time. */
+ if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+ vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+ active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+ IRQ_PEND_IO_MASK |
+ IRQ_PEND_MCHK_MASK);
/*
* Check both floating and local interrupt's cr14 because
* bit IRQ_PEND_MCHK_REP could be set in both cases.
@@ -479,19 +490,23 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_cputm++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
- (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -499,19 +514,23 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_ckc++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
- (u16 __user *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+ (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -553,6 +572,20 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
union mci mci;
int rc;
+ /*
+ * All other possible payload for a machine check (e.g. the register
+ * contents in the save area) will be handled by the ultravisor, as
+ * the hypervisor does not not have the needed information for
+ * protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+ vcpu->arch.sie_block->mcic = mchk->mcic;
+ vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+ vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+ return 0;
+ }
+
mci.val = mchk->mcic;
/* take care of lazy register loading */
save_fpu_regs();
@@ -696,17 +729,21 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
- rc = write_guest_lc(vcpu,
- offsetof(struct lowcore, restart_old_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+ } else {
+ rc = write_guest_lc(vcpu,
+ offsetof(struct lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -748,6 +785,12 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
cpu_addr, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+ vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
(u16 *)__LC_EXT_INT_CODE);
@@ -776,6 +819,12 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_EXTERNAL_CALL,
extcall.code, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+ vcpu->arch.sie_block->extcpuaddr = extcall.code;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
(u16 *)__LC_EXT_INT_CODE);
@@ -787,6 +836,21 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+ switch (code) {
+ case PGM_SPECIFICATION:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+ break;
+ case PGM_OPERAND:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -807,6 +871,10 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
+ /* PER is handled by the ultravisor */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
switch (pgm_info.code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
@@ -818,7 +886,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
nullifying = true;
- /* fall through */
+ fallthrough;
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
@@ -902,20 +970,49 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+ int rc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+ vcpu->arch.sie_block->eiparams = parm;
+ return 0;
+ }
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, parm,
+ (u32 *)__LC_EXT_PARAMS);
+
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_ext_info ext;
- int rc = 0;
spin_lock(&fi->lock);
- if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+ !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
spin_unlock(&fi->lock);
return 0;
}
ext = fi->srv_signal;
memset(&fi->srv_signal, 0, sizeof(ext));
clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
@@ -924,16 +1021,31 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
ext.ext_params, 0);
- rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, ext.ext_params,
- (u32 *)__LC_EXT_PARAMS);
+ return write_sclp(vcpu, ext.ext_params);
+}
- return rc ? -EFAULT : 0;
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ /* only clear the event bit */
+ fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+ vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
+
+ return write_sclp(vcpu, SCCB_EVENT_PENDING);
}
static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1028,6 +1140,15 @@ static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
{
int rc;
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+ vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+ vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+ vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+ vcpu->arch.sie_block->io_int_word = io->io_int_word;
+ return 0;
+ }
+
rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
@@ -1329,6 +1450,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu);
break;
+ case IRQ_PEND_EXT_SERVICE_EV:
+ rc = __deliver_service_ev(vcpu);
+ break;
case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu);
break;
@@ -1421,7 +1545,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp.has_sigpif)
+ if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1681,9 +1805,6 @@ out:
return inti;
}
-#define SCCB_MASK 0xFFFFFFF8
-#define SCCB_EVENT_PENDING 0x3
-
static int __inject_service(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
@@ -1692,6 +1813,11 @@ static int __inject_service(struct kvm *kvm,
kvm->stat.inject_service_signal++;
spin_lock(&fi->lock);
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+ /* We always allow events, track them separately from the sccb ints */
+ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+ set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
/*
* Early versions of the QEMU s390 bios will inject several
* service interrupts after another without handling a
@@ -1773,7 +1899,14 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
kvm->stat.inject_io++;
isc = int_word_to_isc(inti->io.io_int_word);
- if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ /*
+ * Do not make use of gisa in protected mode. We do not use the lock
+ * checking variant as this is just a performance optimization and we
+ * do not hold the lock here. This is ok as the code will pick
+ * interrupts from both "lists" for delivery.
+ */
+ if (!kvm_s390_pv_get_handle(kvm) &&
+ gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti);
@@ -1834,7 +1967,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
if (!(type & KVM_S390_INT_IO_AI_MASK &&
- kvm->arch.gisa_int.origin))
+ kvm->arch.gisa_int.origin) ||
+ kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
@@ -2080,6 +2214,10 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
int i;
+ mutex_lock(&kvm->lock);
+ if (!kvm_s390_pv_is_protected(kvm))
+ fi->masked_irqs = 0;
+ mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
@@ -2146,7 +2284,8 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
n++;
}
}
- if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+ test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
if (n == max_irqs) {
/* signal userspace to try again */
ret = -ENOMEM;
@@ -2327,9 +2466,6 @@ static int register_io_adapter(struct kvm_device *dev,
if (!adapter)
return -ENOMEM;
- INIT_LIST_HEAD(&adapter->maps);
- init_rwsem(&adapter->maps_lock);
- atomic_set(&adapter->nr_maps, 0);
adapter->id = adapter_info.id;
adapter->isc = adapter_info.isc;
adapter->maskable = adapter_info.maskable;
@@ -2354,87 +2490,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
return ret;
}
-static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map;
- int ret;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (!map) {
- ret = -ENOMEM;
- goto out;
- }
- INIT_LIST_HEAD(&map->list);
- map->guest_addr = addr;
- map->addr = gmap_translate(kvm->arch.gmap, addr);
- if (map->addr == -EFAULT) {
- ret = -EFAULT;
- goto out;
- }
- ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
- if (ret < 0)
- goto out;
- BUG_ON(ret != 1);
- down_write(&adapter->maps_lock);
- if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
- list_add_tail(&map->list, &adapter->maps);
- ret = 0;
- } else {
- put_page(map->page);
- ret = -EINVAL;
- }
- up_write(&adapter->maps_lock);
-out:
- if (ret)
- kfree(map);
- return ret;
-}
-
-static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map, *tmp;
- int found = 0;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- down_write(&adapter->maps_lock);
- list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
- if (map->guest_addr == addr) {
- found = 1;
- atomic_dec(&adapter->nr_maps);
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- break;
- }
- }
- up_write(&adapter->maps_lock);
-
- return found ? 0 : -EINVAL;
-}
-
void kvm_s390_destroy_adapters(struct kvm *kvm)
{
int i;
- struct s390_map_info *map, *tmp;
- for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
- if (!kvm->arch.adapters[i])
- continue;
- list_for_each_entry_safe(map, tmp,
- &kvm->arch.adapters[i]->maps, list) {
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- }
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
kfree(kvm->arch.adapters[i]);
- }
}
static int modify_io_adapter(struct kvm_device *dev,
@@ -2456,11 +2517,14 @@ static int modify_io_adapter(struct kvm_device *dev,
if (ret > 0)
ret = 0;
break;
+ /*
+ * The following operations are no longer needed and therefore no-ops.
+ * The gpa to hva translation is done when an IRQ route is set up. The
+ * set_irq code uses get_user_pages_remote() to do the actual write.
+ */
case KVM_S390_IO_ADAPTER_MAP:
- ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
- break;
case KVM_S390_IO_ADAPTER_UNMAP:
- ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ ret = 0;
break;
default:
ret = -EINVAL;
@@ -2699,19 +2763,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
-static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
- u64 addr)
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
- struct s390_map_info *map;
+ struct page *page = NULL;
- if (!adapter)
- return NULL;
-
- list_for_each_entry(map, &adapter->maps, list) {
- if (map->guest_addr == addr)
- return map;
- }
- return NULL;
+ down_read(&kvm->mm->mmap_sem);
+ get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ &page, NULL, NULL);
+ up_read(&kvm->mm->mmap_sem);
+ return page;
}
static int adapter_indicators_set(struct kvm *kvm,
@@ -2720,30 +2780,35 @@ static int adapter_indicators_set(struct kvm *kvm,
{
unsigned long bit;
int summary_set, idx;
- struct s390_map_info *info;
+ struct page *ind_page, *summary_page;
void *map;
- info = get_map_info(adapter, adapter_int->ind_addr);
- if (!info)
+ ind_page = get_map_page(kvm, adapter_int->ind_addr);
+ if (!ind_page)
return -1;
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
- set_bit(bit, map);
- idx = srcu_read_lock(&kvm->srcu);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
- info = get_map_info(adapter, adapter_int->summary_addr);
- if (!info) {
- srcu_read_unlock(&kvm->srcu, idx);
+ summary_page = get_map_page(kvm, adapter_int->summary_addr);
+ if (!summary_page) {
+ put_page(ind_page);
return -1;
}
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->summary_offset,
- adapter->swap);
+
+ idx = srcu_read_lock(&kvm->srcu);
+ map = page_address(ind_page);
+ bit = get_ind_bit(adapter_int->ind_addr,
+ adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(ind_page);
+ map = page_address(summary_page);
+ bit = get_ind_bit(adapter_int->summary_addr,
+ adapter_int->summary_offset, adapter->swap);
summary_set = test_and_set_bit(bit, map);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
+ mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(summary_page);
srcu_read_unlock(&kvm->srcu, idx);
+
+ put_page(ind_page);
+ put_page(summary_page);
return summary_set ? 0 : 1;
}
@@ -2765,9 +2830,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
if (!adapter)
return -1;
- down_read(&adapter->maps_lock);
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
- up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
@@ -2818,23 +2881,27 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int ret;
+ u64 uaddr;
switch (ue->type) {
+ /* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
e->set = set_adapter_int;
- e->adapter.summary_addr = ue->u.adapter.summary_addr;
- e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.summary_addr = uaddr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.ind_addr = uaddr;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
- ret = 0;
- break;
+ return 0;
default:
- ret = -EINVAL;
+ return -EINVAL;
}
-
- return ret;
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c2e6d4ba4e23..19a81024fe16 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -44,6 +44,7 @@
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -184,6 +185,11 @@ static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -220,6 +226,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -228,13 +235,15 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
+/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -293,7 +302,7 @@ static struct notifier_block kvm_clock_notifier = {
.notifier_call = kvm_clock_sync,
};
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
@@ -460,7 +469,12 @@ int kvm_arch_init(void *opaque)
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf_uv)
+ goto out;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+ debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
goto out;
kvm_s390_cpu_feat_init();
@@ -487,6 +501,7 @@ void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
debug_unregister(kvm_s390_dbf);
+ debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */
@@ -564,14 +579,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
return r;
}
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
@@ -612,9 +629,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int is_dirty = 0;
+ int is_dirty;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -625,14 +641,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- r = -ENOENT;
- if (!memslot->dirty_bitmap)
- goto out;
-
- kvm_s390_sync_dirty_log(kvm, memslot);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
@@ -1993,6 +2002,9 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
+ if (unlikely(!slots->used_slots))
+ return 0;
+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
@@ -2158,6 +2170,194 @@ out:
return r;
}
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+ struct kvm_vcpu *vcpu;
+ u16 rc, rrc;
+ int ret = 0;
+ int i;
+
+ /*
+ * We ignore failures and try to destroy as many CPUs as possible.
+ * At the same time we must not free the assigned resources when
+ * this fails, as the ultravisor has still access to that memory.
+ * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+ * behind.
+ * We want to return the first failure rc and rrc, though.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+ *rcp = rc;
+ *rrcp = rrc;
+ ret = -EIO;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int i, r = 0;
+ u16 dummy;
+
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+ mutex_unlock(&vcpu->mutex);
+ if (r)
+ break;
+ }
+ if (r)
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ u16 dummy;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_ENABLE: {
+ r = -EINVAL;
+ if (kvm_s390_pv_is_protected(kvm))
+ break;
+
+ /*
+ * FMT 4 SIE needs esca. As we never switch back to bsca from
+ * esca, we need no cleanup in the error cases below
+ */
+ r = sca_switch_to_extended(kvm);
+ if (r)
+ break;
+
+ down_write(&current->mm->mmap_sem);
+ r = gmap_mark_unmergeable();
+ up_write(&current->mm->mmap_sem);
+ if (r)
+ break;
+
+ r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ break;
+
+ r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+ /* we need to block service interrupts from now on */
+ set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_DISABLE: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+ &cmd->rc, &cmd->rrc);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_VERIFY: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+ cmd->rrc);
+ break;
+ }
+ case KVM_PV_PREP_RESET: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ case KVM_PV_UNSHARE_ALL: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2255,6 +2455,33 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->slots_lock);
break;
}
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ /* protvirt means user sigp */
+ kvm->arch.user_cpu_state_ctrl = 1;
+ r = 0;
+ if (!is_prot_virt_host()) {
+ r = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ if (args.flags) {
+ r = -EINVAL;
+ break;
+ }
+ mutex_lock(&kvm->lock);
+ r = kvm_s390_handle_pv(kvm, &args);
+ mutex_unlock(&kvm->lock);
+ if (copy_to_user(argp, &args, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2504,7 +2731,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.use_skf = sclp.has_skey;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
- kvm_s390_gisa_init(kvm);
+ if (use_gisa)
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2518,6 +2746,8 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ u16 rc, rrc;
+
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
@@ -2530,6 +2760,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ /* We can not hold the vcpu mutex here, we are already dying */
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsigned long)(vcpu->arch.sie_block));
}
@@ -2551,10 +2784,20 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u16 rc, rrc;
+
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ /*
+ * We are already at the end of life and kvm->lock is not taken.
+ * This is ok as the file descriptor is closed by now and nobody
+ * can mess with the pv state. To avoid lockdep_assert_held from
+ * complaining we do not use kvm_s390_pv_is_protected.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2650,6 +2893,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -2901,6 +3147,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ u16 uvrc, uvrrc;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
@@ -2968,6 +3215,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ mutex_lock(&vcpu->kvm->lock);
+ if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+ if (rc)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ }
+ mutex_unlock(&vcpu->kvm->lock);
+
return rc;
}
@@ -3277,7 +3532,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0;
- vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
@@ -3295,9 +3549,17 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.pp = 0;
vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
- vcpu->arch.sie_block->gbea = 1;
- vcpu->arch.sie_block->pp = 0;
- vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ /*
+ * Do not reset these registers in the protected case, as some of
+ * them are overlayed and they are not accessible in this case
+ * anyway.
+ */
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->todpr = 0;
+ }
}
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
@@ -3487,14 +3749,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
- kvm_s390_vcpu_stop(vcpu);
+ rc = kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
- kvm_s390_vcpu_start(vcpu);
+ rc = kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ rc = -ENXIO;
+ break;
+ }
+ rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+ break;
case KVM_MP_STATE_CHECK_STOP:
- /* fall through - CHECK_STOP and LOAD are not supported yet */
+ fallthrough; /* CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
@@ -3844,9 +4112,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_fault_in_sie(vcpu);
}
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
+ struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3868,8 +4138,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(sie_page->pv_grregs,
+ vcpu->run->s.regs.gprs,
+ sizeof(sie_page->pv_grregs));
+ }
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(vcpu->run->s.regs.gprs,
+ sie_page->pv_grregs,
+ sizeof(sie_page->pv_grregs));
+ /*
+ * We're not allowed to inject interrupts on intercepts
+ * that leave the guest state in an "in-between" state
+ * where the next SIE entry will do a continuation.
+ * Fence interrupts in our "internal" PSW.
+ */
+ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+ vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ }
+ }
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
@@ -3883,7 +4173,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -3892,16 +4182,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- /* some control register changes require a tlb flush */
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
- vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3942,6 +4223,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3956,23 +4267,47 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+
+ /* Sync fmt2 only data */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+ sync_regs_fmt2(vcpu, kvm_run);
+ } else {
+ /*
+ * In several places we have to modify our internal view to
+ * not do things that are disallowed by the ultravisor. For
+ * example we must not inject interrupts after specific exits
+ * (e.g. 112 prefix page not secure). We do this by turning
+ * off the machine check, external and I/O interrupt bits
+ * of our PSW copy. To avoid getting validity intercepts, we
+ * do only accept the condition code from userspace.
+ */
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+ vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+ PSW_MASK_CC;
+ }
+
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
if (MACHINE_HAS_GS) {
- preempt_disable();
__ctl_set_bit(2, 4);
- if (current->thread.gs_cb) {
- vcpu->arch.host_gscb = current->thread.gs_cb;
- save_gs_cb(vcpu->arch.host_gscb);
- }
- if (vcpu->arch.gs_enabled) {
- current->thread.gs_cb = (struct gs_cb *)
- &vcpu->run->s.regs.gscb;
- restore_gs_cb(current->thread.gs_cb);
- }
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
}
- /* SIE will load etoken directly from SDNX and therefore kvm_run */
-
- kvm_run->kvm_dirty_regs = 0;
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3983,13 +4318,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
- kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
- kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
- kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
- kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
@@ -3998,19 +4329,8 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
- if (MACHINE_HAS_GS) {
- __ctl_set_bit(2, 4);
- if (vcpu->arch.gs_enabled)
- save_gs_cb(current->thread.gs_cb);
- preempt_disable();
- current->thread.gs_cb = vcpu->arch.host_gscb;
- restore_gs_cb(vcpu->arch.host_gscb);
- preempt_enable();
- if (!vcpu->arch.host_gscb)
- __ctl_clear_bit(2, 4);
- vcpu->arch.host_gscb = NULL;
- }
- /* SIE will save etoken directly into SDNX and therefore kvm_run */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+ store_regs_fmt2(vcpu, kvm_run);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -4034,6 +4354,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_activate(vcpu);
+ /*
+ * no need to check the return value of vcpu_start as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
@@ -4171,18 +4495,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the operating state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
@@ -4202,27 +4535,43 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
+ * The real PSW might have changed due to a RESTART interpreted by the
+ * ultravisor. We block all interrupts and let the next sie exit
+ * refresh our view.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ /*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the stopped state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
@@ -4245,7 +4594,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4272,12 +4621,40 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ int r = 0;
+
+ if (mop->flags || !mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset < mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+ if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), mop->size))
+ r = -EFAULT;
+
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), uaddr, mop->size))
+ r = -EFAULT;
+ break;
+ }
+ return r;
+}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
- int r, srcu_idx;
+ int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
@@ -4287,14 +4664,15 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4320,12 +4698,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
- default:
- r = -EINVAL;
}
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
@@ -4333,6 +4707,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ int r, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ r = kvm_s390_guest_mem_op(vcpu, mop);
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ /* we are locked against sida going away by the vcpu->mutex */
+ r = kvm_s390_guest_sida_op(vcpu, mop);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ return r;
+}
+
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4368,6 +4767,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int idx;
long r;
+ u16 rc, rrc;
vcpu_load(vcpu);
@@ -4389,18 +4789,40 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_S390_CLEAR_RESET:
r = 0;
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_INITIAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_INITIAL,
+ &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_NORMAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EINVAL;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ break;
r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
break;
@@ -4463,7 +4885,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4523,12 +4945,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -4549,12 +4965,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -4570,7 +4989,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
old->npages * PAGE_SIZE);
if (rc)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6d9448dbd052..79dcd647b378 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -2,7 +2,7 @@
/*
* definition for kvm on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/lockdep.h>
#include <asm/facility.h>
#include <asm/processor.h>
#include <asm/sclp.h>
@@ -25,6 +26,17 @@
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+ debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+ "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+ d_args); \
+} while (0)
+
#define KVM_EVENT(d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
@@ -196,6 +208,39 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+ return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+ return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_held(&vcpu->mutex);
+ return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -286,8 +331,8 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed52ffa8d5d4..69a824f9ef0b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -872,7 +872,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
- if (operand2 & 0xfff)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (fc) {
@@ -893,8 +893,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
}
-
- rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
+ PAGE_SIZE);
+ rc = 0;
+ } else {
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ }
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 000000000000..63e330109b63
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ * Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ int cc = 0;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+ vcpu->vcpu_id, *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
+ *rc, *rrc);
+ }
+ /* Intended memory leak for something that should never happen. */
+ if (!cc)
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+
+ free_page(sida_origin(vcpu->arch.sie_block));
+ vcpu->arch.sie_block->pv_handle_cpu = 0;
+ vcpu->arch.sie_block->pv_handle_config = 0;
+ memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+ vcpu->arch.sie_block->sdf = 0;
+ /*
+ * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+ * Use the reset value of gbea to avoid leaking the kernel pointer of
+ * the just freed sida.
+ */
+ vcpu->arch.sie_block->gbea = 1;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+ return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_csc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CPU,
+ .header.len = sizeof(uvcb),
+ };
+ int cc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ return -EINVAL;
+
+ vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+ get_order(uv_info.guest_cpu_stor_len));
+ if (!vcpu->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /* Input */
+ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+ uvcb.num = vcpu->arch.sie_block->icpua;
+ uvcb.state_origin = (u64)vcpu->arch.sie_block;
+ uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+ /* Alloc Secure Instruction Data Area Designation */
+ vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.sie_block->sidad) {
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+ return -ENOMEM;
+ }
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+ vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ if (cc) {
+ u16 dummy;
+
+ kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+ return -EIO;
+ }
+
+ /* Output */
+ vcpu->arch.pv.handle = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+ vcpu->arch.sie_block->sdf = 2;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+ vfree(kvm->arch.pv.stor_var);
+ free_pages(kvm->arch.pv.stor_base,
+ get_order(uv_info.guest_base_stor_len));
+ memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+ unsigned long base = uv_info.guest_base_stor_len;
+ unsigned long virt = uv_info.guest_virt_var_stor_len;
+ unsigned long npages = 0, vlen = 0;
+ struct kvm_memory_slot *memslot;
+
+ kvm->arch.pv.stor_var = NULL;
+ kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+ if (!kvm->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /*
+ * Calculate current guest storage for allocation of the
+ * variable storage, which is based on the length in MB.
+ *
+ * Slots are sorted by GFN
+ */
+ mutex_lock(&kvm->slots_lock);
+ memslot = kvm_memslots(kvm)->memslots;
+ npages = memslot->base_gfn + memslot->npages;
+ mutex_unlock(&kvm->slots_lock);
+
+ kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+ /* Allocate variable storage */
+ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+ vlen += uv_info.guest_virt_base_stor_len;
+ kvm->arch.pv.stor_var = vzalloc(vlen);
+ if (!kvm->arch.pv.stor_var)
+ goto out_err;
+ return 0;
+
+out_err:
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -ENOMEM;
+}
+
+/* this should not fail, but if it does, we must not free the donated memory */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ /* make all pages accessible before destroying the guest */
+ s390_reset_acc(kvm->mm);
+
+ cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ atomic_set(&kvm->mm->context.is_protected, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ /* Inteded memory leak on "impossible" error */
+ if (!cc)
+ kvm_s390_pv_dealloc_vm(kvm);
+ return cc ? -EIO : 0;
+}
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_cgc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CONF,
+ .header.len = sizeof(uvcb)
+ };
+ int cc, ret;
+ u16 dummy;
+
+ ret = kvm_s390_pv_alloc_vm(kvm);
+ if (ret)
+ return ret;
+
+ /* Inputs */
+ uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+ uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+ uvcb.guest_asce = kvm->arch.gmap->asce;
+ uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+ /* Outputs */
+ kvm->arch.pv.handle = uvcb.guest_handle;
+
+ if (cc) {
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+ else
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -EIO;
+ }
+ kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ atomic_set(&kvm->mm->context.is_protected, 1);
+ return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc)
+{
+ struct uv_cb_ssc uvcb = {
+ .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+ .header.len = sizeof(uvcb),
+ .sec_header_origin = (u64)hdr,
+ .sec_header_len = length,
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ int cc = uv_call(0, (u64)&uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+ *rc, *rrc);
+ return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+ u64 offset, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_unp uvcb = {
+ .header.cmd = UVC_CMD_UNPACK_IMG,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = addr,
+ .tweak[0] = tweak,
+ .tweak[1] = offset,
+ };
+ int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+
+ if (ret && ret != -EAGAIN)
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+ uvcb.gaddr, *rc, *rrc);
+ return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc)
+{
+ u64 offset = 0;
+ int ret = 0;
+
+ if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+ return -EINVAL;
+
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+ addr, size);
+
+ while (offset < size) {
+ ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+ if (ret == -EAGAIN) {
+ cond_resched();
+ if (fatal_signal_pending(current))
+ break;
+ continue;
+ }
+ if (ret)
+ break;
+ addr += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ if (!ret)
+ KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+ return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+ struct uv_cb_cpu_set_state uvcb = {
+ .header.cmd = UVC_CMD_CPU_SET_STATE,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
+ .state = state,
+ };
+ int cc;
+
+ cc = uv_call(0, (u64)&uvcb);
+ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+ vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+ if (cc)
+ return -EINVAL;
+ return 0;
+}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index aeccdb30899a..09bf7f2121ac 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -38,6 +38,7 @@
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "../kernel/entry.h"
#define __FAIL_ADDR_MASK -4096L
@@ -812,3 +813,80 @@ out_extint:
early_initcall(pfault_irq_init);
#endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct page *page;
+ int rc;
+
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ mm = current->mm;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ up_read(&mm->mmap_sem);
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ break;
+ }
+ page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page)) {
+ up_read(&mm->mmap_sem);
+ break;
+ }
+ if (arch_make_page_accessible(page))
+ send_sig(SIGSEGV, current, 0);
+ put_page(page);
+ up_read(&mm->mmap_sem);
+ break;
+ case KERNEL_FAULT:
+ page = phys_to_page(addr);
+ if (unlikely(!try_get_page(page)))
+ break;
+ rc = arch_make_page_accessible(page);
+ put_page(page);
+ if (rc)
+ BUG();
+ break;
+ case VDSO_FAULT:
+ /* fallthrough */
+ case GMAP_FAULT:
+ /* fallthrough */
+ default:
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ }
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ if (get_fault_type(regs) != GMAP_FAULT) {
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index edcdca97e85e..2fbece47ef6f 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -804,7 +804,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
@@ -812,7 +812,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
@@ -820,7 +820,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
@@ -2548,6 +2548,23 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
+int gmap_mark_unmergeable(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags);
+ if (ret)
+ return ret;
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2593,7 +2610,6 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
int s390_enable_skey(void)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
int rc = 0;
down_write(&mm->mmap_sem);
@@ -2601,16 +2617,11 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.uses_skeys = 0;
- rc = -ENOMEM;
- goto out_up;
- }
+ rc = gmap_mark_unmergeable();
+ if (rc) {
+ mm->context.uses_skeys = 0;
+ goto out_up;
}
- mm->def_flags &= ~VM_MERGEABLE;
-
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
out_up:
@@ -2640,3 +2651,38 @@ void s390_reset_cmma(struct mm_struct *mm)
up_write(&mm->mmap_sem);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * make inaccessible pages accessible again
+ */
+static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte))
+ WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK));
+ return 0;
+}
+
+static const struct mm_walk_ops reset_acc_walk_ops = {
+ .pte_entry = __s390_reset_acc,
+};
+
+#include <linux/sched/mm.h>
+void s390_reset_acc(struct mm_struct *mm)
+{
+ /*
+ * we might be called during
+ * reset: we walk the pages and clear
+ * close of all kvm file descriptors: we walk the pages and clear
+ * exit of process on fd closure: vma already gone, do nothing
+ */
+ if (!mmget_not_zero(mm))
+ return;
+ down_read(&mm->mmap_sem);
+ walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_acc);