- SCA rework

- VIRT_XFER_TO_GUEST_WORK support
 - Operation exception forwarding support
 - Cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEwGNS88vfc9+v45Yq41TmuOI4ufgFAmktiX8ACgkQ41TmuOI4
 ufhozBAAuyPxu1cZqfAiuEpftR0fUFZeyqRLHqfFPNQUGW/kPZRz2uNd38qulboV
 gmbu5jcwf8SdbF+p8f7RLvkEyTEnzuXELrfSwcwyv9IUiK++p9gRNkuppHbNnTI7
 yK21hJz+jZmRzUrSxnLylTC3++RZczhVeHqHzwosnHcNerK6FLcIjjsl7YinJToI
 T3jiTmprXl5NzFu7O5N/3J2KAIqNr+3DfnOf2lnLzHeupc52Z6TtvdizypAAV7Yk
 qWQ/81HI8GtIPFWss1kNwrJXQBjgBObz3XBOtq0bw1Ycs+BijsQh424vFoetV1/n
 bdmEh38lfY3sbbSE3RomnEATRdzremiYb63v5E4Bg7/bpLPhXw+jMF2Hp8jNqOiZ
 jI7KpGPOA4+C1EzS+Uge81fksW+ylNEYk/dZgGQgOFtF8Vf+Ana0NloDAqMHUeXq
 gVI2Sd9nMR80WslVzs5DMj/XK86J2TsFxtKYPa1cHV9PkHegO+eJm2nWCRHbfddz
 iEymokTm9xmfykjFfKDwZ4EcB5vdV7cuNE8aedsp9NXgICrgDbPn8ualG6aZUB0c
 ScvfRuoiZT7e4D8UZ79uCOCPQqwGCffOfIOee3ocf/95ZVY+9xv7FTTh200DjBU2
 Jv1NoTe9ZOO4+dYWRsht0fzC7zBVDO3CEb6OcNRB9wgNidDQaeM=
 =PtzZ
 -----END PGP SIGNATURE-----

Merge tag 'kvm-s390-next-6.19-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

- SCA rework
- VIRT_XFER_TO_GUEST_WORK support
- Operation exception forwarding support
- Cleanups
This commit is contained in:
Paolo Bonzini 2025-12-02 18:58:47 +01:00
commit e0c26d47de
15 changed files with 271 additions and 271 deletions

View File

@ -7855,7 +7855,7 @@ where 0xff represents CPUs 0-7 in cluster 0.
:Architectures: s390
:Parameters: none
With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
With this capability enabled, the illegal instruction 0x0000 (2 bytes) will
be intercepted and forwarded to user space. User space can use this
mechanism e.g. to realize 2-byte software breakpoints. The kernel will
not inject an operating exception for these instructions, user space has
@ -8727,7 +8727,7 @@ given VM.
When this capability is enabled, KVM resets the VCPU when setting
MP_STATE_INIT_RECEIVED through IOCTL. The original MP_STATE is preserved.
7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED
7.44 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED
-------------------------------------------
:Architectures: arm64
@ -8750,6 +8750,21 @@ When this capability is enabled, KVM may exit to userspace for SEAs taken to
EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more
information.
7.46 KVM_CAP_S390_USER_OPEREXEC
-------------------------------
:Architectures: s390
:Parameters: none
When this capability is enabled KVM forwards all operation exceptions
that it doesn't handle itself to user space. This also includes the
0x0000 instructions managed by KVM_CAP_S390_USER_INSTR0. This is
helpful if user space wants to emulate instructions which are not
(yet) implemented in hardware.
This capability can be enabled dynamically even if VCPUs were already
created and are running.
8. Other capabilities.
======================

View File

@ -146,6 +146,7 @@ struct kvm_vcpu_stat {
u64 instruction_diagnose_500;
u64 instruction_diagnose_other;
u64 pfault_sync;
u64 signal_exits;
};
#define PGM_OPERATION 0x01
@ -631,10 +632,8 @@ struct kvm_s390_pv {
struct mmu_notifier mmu_notifier;
};
struct kvm_arch{
void *sca;
int use_esca;
rwlock_t sca_lock;
struct kvm_arch {
struct esca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
@ -650,6 +649,7 @@ struct kvm_arch{
int user_sigp;
int user_stsi;
int user_instr0;
int user_operexec;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
int ipte_lock_count;

View File

@ -66,6 +66,7 @@ struct stack_frame {
unsigned long sie_flags;
unsigned long sie_control_block_phys;
unsigned long sie_guest_asce;
unsigned long sie_irq;
};
};
unsigned long gprs[10];

View File

@ -64,6 +64,7 @@ int main(void)
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);

View File

@ -189,6 +189,7 @@ SYM_FUNC_START(__sie64a)
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
mvi __TI_sie(%r14),1
stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts
lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
@ -212,6 +213,7 @@ SYM_FUNC_START(__sie64a)
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
xgr %r0,%r0 # clear guest registers to

View File

@ -29,6 +29,7 @@ config KVM
select HAVE_KVM_NO_POLL
select KVM_VFIO
select MMU_NOTIFIER
select VIRT_XFER_TO_GUEST_WORK
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work

View File

@ -109,14 +109,9 @@ struct aste {
int ipte_lock_held(struct kvm *kvm)
{
if (sclp.has_siif) {
int rc;
if (sclp.has_siif)
return kvm->arch.sca->ipte_control.kh != 0;
read_lock(&kvm->arch.sca_lock);
rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
read_unlock(&kvm->arch.sca_lock);
return rc;
}
return kvm->arch.ipte_lock_count != 0;
}
@ -129,19 +124,16 @@ static void ipte_lock_simple(struct kvm *kvm)
if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
ic = &kvm->arch.sca->ipte_control;
old = READ_ONCE(*ic);
do {
if (old.k) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
out:
mutex_unlock(&kvm->arch.ipte_mutex);
}
@ -154,14 +146,12 @@ static void ipte_unlock_simple(struct kvm *kvm)
kvm->arch.ipte_lock_count--;
if (kvm->arch.ipte_lock_count)
goto out;
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
ic = &kvm->arch.sca->ipte_control;
old = READ_ONCE(*ic);
do {
new = old;
new.k = 0;
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
wake_up(&kvm->arch.ipte_wq);
out:
mutex_unlock(&kvm->arch.ipte_mutex);
@ -172,12 +162,10 @@ static void ipte_lock_siif(struct kvm *kvm)
union ipte_control old, new, *ic;
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
ic = &kvm->arch.sca->ipte_control;
old = READ_ONCE(*ic);
do {
if (old.kg) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
@ -185,15 +173,13 @@ static void ipte_lock_siif(struct kvm *kvm)
new.k = 1;
new.kh++;
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
ic = &kvm->arch.sca->ipte_control;
old = READ_ONCE(*ic);
do {
new = old;
@ -201,7 +187,6 @@ static void ipte_unlock_siif(struct kvm *kvm)
if (!new.kh)
new.k = 0;
} while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
wake_up(&kvm->arch.ipte_wq);
}

View File

@ -471,6 +471,9 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->ipa == 0xb256)
return handle_sthyi(vcpu);
if (vcpu->kvm->arch.user_operexec)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
return -EOPNOTSUPP;
rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));

View File

@ -45,70 +45,34 @@ static struct kvm_s390_gib *gib;
/* handle external calls via sigp interpretation facility */
static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
int c, scn;
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
return 0;
BUG_ON(!kvm_s390_use_sca_entries());
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl sigp_ctrl =
sca->cpu[vcpu->vcpu_id].sigp_ctrl;
c = sigp_ctrl.c;
scn = sigp_ctrl.scn;
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl sigp_ctrl =
sca->cpu[vcpu->vcpu_id].sigp_ctrl;
c = sigp_ctrl.c;
scn = sigp_ctrl.scn;
}
read_unlock(&vcpu->kvm->arch.sca_lock);
if (src_id)
*src_id = scn;
*src_id = sigp_ctrl.scn;
return c;
return sigp_ctrl.c;
}
static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
{
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl;
union esca_sigp_ctrl old_val, new_val = {.scn = src_id, .c = 1};
int expect, rc;
BUG_ON(!kvm_s390_use_sca_entries());
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union esca_sigp_ctrl new_val = {0}, old_val;
old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
old_val = READ_ONCE(*sigp_ctrl);
old_val.c = 0;
expect = old_val.value;
rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
union bsca_sigp_ctrl new_val = {0}, old_val;
old_val = READ_ONCE(*sigp_ctrl);
new_val.scn = src_id;
new_val.c = 1;
old_val.c = 0;
expect = old_val.value;
rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
expect = old_val.value;
rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
if (rc != expect) {
/* another external call is pending */
@ -120,24 +84,14 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl = &sca->cpu[vcpu->vcpu_id].sigp_ctrl;
if (!kvm_s390_use_sca_entries())
return;
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
WRITE_ONCE(sigp_ctrl->value, 0);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
WRITE_ONCE(sigp_ctrl->value, 0);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
WRITE_ONCE(sigp_ctrl->value, 0);
}
int psw_extint_disabled(struct kvm_vcpu *vcpu)
@ -1224,7 +1178,7 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
if (!sclp.has_sigpif)
if (!kvm_s390_use_sca_entries())
return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
return sca_ext_call_pending(vcpu, NULL);
@ -1549,7 +1503,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
if (kvm_s390_use_sca_entries() && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))

View File

@ -14,6 +14,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/compiler.h>
#include <linux/entry-virt.h>
#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
@ -185,7 +186,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
STATS_DESC_COUNTER(VCPU, pfault_sync)
STATS_DESC_COUNTER(VCPU, pfault_sync),
STATS_DESC_COUNTER(VCPU, signal_exits)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@ -272,7 +274,6 @@ debug_info_t *kvm_s390_dbf_uv;
/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@ -607,6 +608,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_S390_USER_OPEREXEC:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@ -632,11 +634,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
r = KVM_S390_BSCA_CPU_SLOTS;
/*
* Return the same value for KVM_CAP_MAX_VCPUS and
* KVM_CAP_MAX_VCPU_ID to conform with the KVM API.
*/
r = KVM_S390_ESCA_CPU_SLOTS;
if (!kvm_s390_use_sca_entries())
r = KVM_MAX_VCPUS;
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
if (ext == KVM_CAP_NR_VCPUS)
r = min_t(unsigned int, num_online_cpus(), r);
break;
@ -920,6 +924,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_OPEREXEC:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_OPEREXEC");
kvm->arch.user_operexec = 1;
icpt_operexc_on_all_vcpus(kvm);
r = 0;
break;
default:
r = -EINVAL;
break;
@ -1931,22 +1941,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
* Updates the Multiprocessor Topology-Change-Report bit to signal
* the guest with a topology change.
* This is only relevant if the topology facility is present.
*
* The SCA version, bsca or esca, doesn't matter as offset is the same.
*/
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
{
union sca_utility new, old;
struct bsca_block *sca;
struct esca_block *sca;
read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
old = READ_ONCE(sca->utility);
do {
new = old;
new.mtcr = val;
} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
@ -1967,9 +1973,7 @@ static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
if (!test_kvm_facility(kvm, 11))
return -ENXIO;
read_lock(&kvm->arch.sca_lock);
topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
read_unlock(&kvm->arch.sca_lock);
topo = kvm->arch.sca->utility.mtcr;
return put_user(topo, (u8 __user *)attr->addr);
}
@ -2667,14 +2671,6 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (kvm_s390_pv_is_protected(kvm))
break;
/*
* FMT 4 SIE needs esca. As we never switch back to bsca from
* esca, we need no cleanup in the error cases below
*/
r = sca_switch_to_extended(kvm);
if (r)
break;
mmap_write_lock(kvm->mm);
r = gmap_helper_disable_cow_sharing();
mmap_write_unlock(kvm->mm);
@ -3317,10 +3313,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm)
static void sca_dispose(struct kvm *kvm)
{
if (kvm->arch.use_esca)
free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
else
free_page((unsigned long)(kvm->arch.sca));
free_pages_exact(kvm->arch.sca, sizeof(*kvm->arch.sca));
kvm->arch.sca = NULL;
}
@ -3334,10 +3327,9 @@ void kvm_arch_free_vm(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
int i, rc;
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
char debug_name[16];
static unsigned long sca_offset;
int i, rc;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
@ -3358,20 +3350,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!sclp.has_64bscao)
alloc_flags |= GFP_DMA;
rwlock_init(&kvm->arch.sca_lock);
/* start with basic SCA */
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
mutex_lock(&kvm_lock);
kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags);
mutex_unlock(&kvm_lock);
if (!kvm->arch.sca)
goto out_err;
mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
snprintf(debug_name, sizeof(debug_name), "kvm-%u", current->pid);
kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
if (!kvm->arch.dbf)
@ -3548,133 +3534,38 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
{
struct esca_block *sca = vcpu->kvm->arch.sca;
if (!kvm_s390_use_sca_entries())
return;
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
}
read_unlock(&vcpu->kvm->arch.sca_lock);
clear_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
}
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
{
if (!kvm_s390_use_sca_entries()) {
phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
struct esca_block *sca = vcpu->kvm->arch.sca;
phys_addr_t sca_phys = virt_to_phys(sca);
/* we still need the basic sca for the ipte control */
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
vcpu->arch.sie_block->scaol = sca_phys;
/* we still need the sca header for the ipte control */
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
if (!kvm_s390_use_sca_entries())
return;
}
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
phys_addr_t sca_phys = virt_to_phys(sca);
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
phys_addr_t sca_phys = virt_to_phys(sca);
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
vcpu->arch.sie_block->scaol = sca_phys;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
}
/* Basic SCA to Extended SCA data copy routines */
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
{
d->sda = s->sda;
d->sigp_ctrl.c = s->sigp_ctrl.c;
d->sigp_ctrl.scn = s->sigp_ctrl.scn;
}
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
{
int i;
d->ipte_control = s->ipte_control;
d->mcn[0] = s->mcn;
for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
sca_copy_entry(&d->cpu[i], &s->cpu[i]);
}
static int sca_switch_to_extended(struct kvm *kvm)
{
struct bsca_block *old_sca = kvm->arch.sca;
struct esca_block *new_sca;
struct kvm_vcpu *vcpu;
unsigned long vcpu_idx;
u32 scaol, scaoh;
phys_addr_t new_sca_phys;
if (kvm->arch.use_esca)
return 0;
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!new_sca)
return -ENOMEM;
new_sca_phys = virt_to_phys(new_sca);
scaoh = new_sca_phys >> 32;
scaol = new_sca_phys & ESCA_SCAOL_MASK;
kvm_s390_vcpu_block_all(kvm);
write_lock(&kvm->arch.sca_lock);
sca_copy_b_to_e(new_sca, old_sca);
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
vcpu->arch.sie_block->scaoh = scaoh;
vcpu->arch.sie_block->scaol = scaol;
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
}
kvm->arch.sca = new_sca;
kvm->arch.use_esca = 1;
write_unlock(&kvm->arch.sca_lock);
kvm_s390_vcpu_unblock_all(kvm);
free_page((unsigned long)old_sca);
VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
set_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
}
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
{
int rc;
if (!kvm_s390_use_sca_entries())
return id < KVM_MAX_VCPUS;
if (!kvm_s390_use_sca_entries()) {
if (id < KVM_MAX_VCPUS)
return true;
return false;
}
if (id < KVM_S390_BSCA_CPU_SLOTS)
return true;
if (!sclp.has_esca || !sclp.has_64bscao)
return false;
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
return id < KVM_S390_ESCA_CPU_SLOTS;
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
@ -3920,7 +3811,7 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= ECA_IB;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= ECA_SII;
if (sclp.has_sigpif)
if (kvm_s390_use_sca_entries())
vcpu->arch.sie_block->eca |= ECA_SIGPI;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= ECA_VX;
@ -4367,8 +4258,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
int ret = 0;
vcpu_load(vcpu);
vcpu->run->s.regs.fpc = fpu->fpc;
@ -4379,7 +4268,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu_put(vcpu);
return ret;
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@ -4787,9 +4676,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
if (need_resched())
schedule();
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
@ -5074,13 +4960,8 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
* The guest_state_{enter,exit}_irqoff() functions inform lockdep and
* tracing that entry to the guest will enable host IRQs, and exit from
* the guest will disable host IRQs.
*
* We must not use lockdep/tracing/RCU in this critical section, so we
* use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
*/
arch_local_irq_enable();
ret = sie64a(scb, gprs, gasce);
arch_local_irq_disable();
guest_state_exit_irqoff();
@ -5099,12 +4980,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
kvm_vcpu_srcu_read_lock(vcpu);
do {
while (true) {
rc = vcpu_pre_run(vcpu);
kvm_vcpu_srcu_read_unlock(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
break;
kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
* guest_timing_enter_irqoff and guest_timing_exit_irqoff
@ -5116,7 +4997,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
sizeof(sie_page->pv_grregs));
}
xfer_to_guest_mode_check:
local_irq_disable();
xfer_to_guest_mode_prepare();
if (xfer_to_guest_mode_work_pending()) {
local_irq_enable();
rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (rc)
break;
goto xfer_to_guest_mode_check;
}
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
@ -5146,9 +5037,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
if (rc || guestdbg_exit_pending(vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
break;
}
}
kvm_vcpu_srcu_read_unlock(vcpu);
return rc;
}
@ -5364,6 +5258,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
vcpu->stat.signal_exits++;
rc = -EINTR;
}

View File

@ -570,13 +570,6 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
/* support for Basic/Extended SCA handling */
static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
{
struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
return &sca->ipte_control;
}
static inline int kvm_s390_use_sca_entries(void)
{
/*
@ -584,7 +577,7 @@ static inline int kvm_s390_use_sca_entries(void)
* might use the entries. By not setting the entries and keeping them
* invalid, hardware will not access them but intercept.
*/
return sclp.has_sigpif;
return sclp.has_sigpif && sclp.has_esca;
}
void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
struct mcck_volatile_info *mcck_info);

View File

@ -782,7 +782,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
rc = set_validity_icpt(scb_s, 0x0011U);
else if ((gpa & PAGE_MASK) !=
((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
((gpa + offsetof(struct bsca_block, cpu[0]) - 1) & PAGE_MASK))
rc = set_validity_icpt(scb_s, 0x003bU);
if (!rc) {
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
@ -1180,12 +1180,23 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
current->thread.gmap_int_code = 0;
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
xfer_to_guest_mode_check:
local_irq_disable();
xfer_to_guest_mode_prepare();
if (xfer_to_guest_mode_work_pending()) {
local_irq_enable();
rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (rc)
goto skip_sie;
goto xfer_to_guest_mode_check;
}
guest_timing_enter_irqoff();
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
guest_timing_exit_irqoff();
local_irq_enable();
}
skip_sie:
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
@ -1345,13 +1356,11 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* but rewind the PSW to re-enter SIE once that's completed
* instead of passing a "no action" intercept to the guest.
*/
if (signal_pending(current) ||
kvm_s390_vcpu_has_irq(vcpu, 0) ||
if (kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu)) {
kvm_s390_rewind_psw(vcpu, 4);
break;
}
cond_resched();
}
if (rc == -EFAULT) {
@ -1483,8 +1492,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu)) {
if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) {
kvm_s390_rewind_psw(vcpu, 4);
return 0;
}

View File

@ -973,6 +973,7 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
#define KVM_CAP_ARM_SEA_TO_USER 245
#define KVM_CAP_S390_USER_OPEREXEC 246
struct kvm_irq_routing_irqchip {
__u32 irqchip;

View File

@ -198,6 +198,7 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
TEST_GEN_PROGS_s390 += s390/user_operexec
TEST_GEN_PROGS_s390 += rseq_test
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Test operation exception forwarding.
*
* Copyright IBM Corp. 2025
*
* Authors:
* Janosch Frank <frankja@linux.ibm.com>
*/
#include "kselftest.h"
#include "kvm_util.h"
#include "test_util.h"
#include "sie.h"
#include <linux/kvm.h>
static void guest_code_instr0(void)
{
asm(".word 0x0000");
}
static void test_user_instr0(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
TEST_ASSERT_EQ(0, rc);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
kvm_vm_free(vm);
}
static void guest_code_user_operexec(void)
{
asm(".word 0x0807");
}
static void test_user_operexec(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
TEST_ASSERT_EQ(0, rc);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
kvm_vm_free(vm);
/*
* Since user_operexec is the superset it can be used for the
* 0 instruction.
*/
vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
TEST_ASSERT_EQ(0, rc);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
kvm_vm_free(vm);
}
/* combine user_instr0 and user_operexec */
static void test_user_operexec_combined(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int rc;
vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
TEST_ASSERT_EQ(0, rc);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
TEST_ASSERT_EQ(0, rc);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
kvm_vm_free(vm);
/* Reverse enablement order */
vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
TEST_ASSERT_EQ(0, rc);
rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
TEST_ASSERT_EQ(0, rc);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
kvm_vm_free(vm);
}
/*
* Run all tests above.
*
* Enablement after VCPU has been added is automatically tested since
* we enable the capability after VCPU creation.
*/
static struct testdef {
const char *name;
void (*test)(void);
} testlist[] = {
{ "instr0", test_user_instr0 },
{ "operexec", test_user_operexec },
{ "operexec_combined", test_user_operexec_combined},
};
int main(int argc, char *argv[])
{
int idx;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(testlist));
for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
testlist[idx].test();
ksft_test_result_pass("%s\n", testlist[idx].name);
}
ksft_finished();
}