mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
KVM SVM changes for 6.20
- Drop a user-triggerable WARN on nested_svm_load_cr3() failure.
- Add support for virtualizing ERAPS. Note, correct virtualization of ERAPS
relies on an upcoming, publicly announced change in the APM to reduce the
set of conditions where hardware (i.e. KVM) *must* flush the RAP.
- Ignore nSVM intercepts for instructions that are not supported according to
L1's virtual CPU model.
- Add support for expedited writes to the fast MMIO bus, a la VMX's fastpath
for EPT Misconfig.
- Don't set GIF when clearing EFER.SVME, as GIF exists independently of SVM,
and allow userspace to restore nested state with GIF=0.
- Treat exit_code as an unsigned 64-bit value through all of KVM.
- Add support for fetching SNP certificates from userspace.
- Fix a bug where KVM would use vmcb02 instead of vmcb01 when emulating VMLOAD
or VMSAVE on behalf of L2.
- Misc fixes and cleanups.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmmGsbEACgkQOlYIJqCj
N/18Iw//U9ZiNSW8k9CGRnXN/hmc8h21cNlTdGliqY3lkf0y7feCb1sEdkCFv6/U
KXlOhGUD8PiVlcJWm3ZWWMq/bJ5Ahcvyvre8RelRMQ5SRw07IojYSI1IkNHpSUBX
brEd8DBG24oaw2El+rkl6mN9fneNUAq4pZtU9QDA/ehKDxpdsym2OAUStAVjXy0R
YtIhsz0k1qX+EN/UIrvBTS6bCG3Ihd6btHgCehqGAOnY2rk5gNR0zChdKV3mdk2t
hsbpKp8rtZppZ9Ltru/ly4TYzaKT/dl9gWt7h1y78fN7XD5orenAe8MOkav3WoPI
zdDkDMzvwjv0p+bGPJKszxJrb4SBagtadvFMmKR+WZ0aYhysdAhxlpt64krqFrSV
wjfNfPQ1Z2qHb9PV4TfuBr4g+OyYZfnBcEvyJswrVHOBTfCoMn4hx4tF0bbSZdLd
nmOVqcXiPPpnOza2EXtYc97PSiHwl/CVlhXguYRPg/FQFnJKHHYoL9aRH4YpyZiK
o/7Bsqe20ouuMoRdVIt+zp8FvhOsuiHV122e6d55+bvNhUGBC4sXNDEKQlmQps4K
yvBUIGWLSx3Por/Iey7Rp+7hCXACf9KXaD1ogG2ZxL7xDE0smj9Jzu2NIzFJWUQ6
uubKwsZBJJDhYAZuDLUFmzoGydntb/Wi/FxetPp7Fzi7D4dnSUI=
=RH/c
-----END PGP SIGNATURE-----
Merge tag 'kvm-x86-svm-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM SVM changes for 6.20
- Drop a user-triggerable WARN on nested_svm_load_cr3() failure.
- Add support for virtualizing ERAPS. Note, correct virtualization of ERAPS
relies on an upcoming, publicly announced change in the APM to reduce the
set of conditions where hardware (i.e. KVM) *must* flush the RAP.
- Ignore nSVM intercepts for instructions that are not supported according to
L1's virtual CPU model.
- Add support for expedited writes to the fast MMIO bus, a la VMX's fastpath
for EPT Misconfig.
- Don't set GIF when clearing EFER.SVME, as GIF exists independently of SVM,
and allow userspace to restore nested state with GIF=0.
- Treat exit_code as an unsigned 64-bit value through all of KVM.
- Add support for fetching SNP certificates from userspace.
- Fix a bug where KVM would use vmcb02 instead of vmcb01 when emulating VMLOAD
or VMSAVE on behalf of L2.
- Misc fixes and cleanups.
This commit is contained in:
commit
4215ee0d7b
22 changed files with 559 additions and 156 deletions
|
|
@ -7382,6 +7382,50 @@ Please note that the kernel is allowed to use the kvm_run structure as the
|
|||
primary storage for certain register types. Therefore, the kernel may use the
|
||||
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_SNP_REQ_CERTS */
|
||||
struct kvm_exit_snp_req_certs {
|
||||
__u64 gpa;
|
||||
__u64 npages;
|
||||
__u64 ret;
|
||||
};
|
||||
|
||||
KVM_EXIT_SNP_REQ_CERTS indicates an SEV-SNP guest with certificate-fetching
|
||||
enabled (see KVM_SEV_SNP_ENABLE_REQ_CERTS) has generated an Extended Guest
|
||||
Request NAE #VMGEXIT (SNP_GUEST_REQUEST) with message type MSG_REPORT_REQ,
|
||||
i.e. has requested an attestation report from firmware, and would like the
|
||||
certificate data corresponding to the attestation report signature to be
|
||||
provided by the hypervisor as part of the request.
|
||||
|
||||
To allow for userspace to provide the certificate, the 'gpa' and 'npages'
|
||||
are forwarded verbatim from the guest request (the RAX and RBX GHCB fields
|
||||
respectively). 'ret' is not an "output" from KVM, and is always '0' on
|
||||
exit. KVM verifies the 'gpa' is 4KiB aligned prior to exiting to userspace,
|
||||
but otherwise the information from the guest isn't validated.
|
||||
|
||||
Upon the next KVM_RUN, e.g. after userspace has serviced the request (or not),
|
||||
KVM will complete the #VMGEXIT, using the 'ret' field to determine whether to
|
||||
signal success or failure to the guest, and on failure, what reason code will
|
||||
be communicated via SW_EXITINFO2. If 'ret' is set to an unsupported value (see
|
||||
the table below), KVM_RUN will fail with -EINVAL. For a 'ret' of 'ENOSPC', KVM
|
||||
also consumes the 'npages' field, i.e. userspace can use the field to inform
|
||||
the guest of the number of pages needed to hold all the certificate data.
|
||||
|
||||
The supported 'ret' values and their respective SW_EXITINFO2 encodings:
|
||||
|
||||
====== =============================================================
|
||||
0 0x0, i.e. success. KVM will emit an SNP_GUEST_REQUEST command
|
||||
to SNP firmware.
|
||||
ENOSPC 0x0000000100000000, i.e. not enough guest pages to hold the
|
||||
certificate table and certificate data. KVM will also set the
|
||||
RBX field in the GHBC to 'npages'.
|
||||
EAGAIN 0x0000000200000000, i.e. the host is busy and the guest should
|
||||
retry the request.
|
||||
EIO 0xffffffff00000000, for all other errors (this return code is
|
||||
a KVM-defined hypervisor value, as allowed by the GHCB)
|
||||
====== =============================================================
|
||||
|
||||
|
||||
.. _cap_enable:
|
||||
|
||||
|
|
|
|||
|
|
@ -572,6 +572,52 @@ Returns: 0 on success, -negative on error
|
|||
See SNP_LAUNCH_FINISH in the SEV-SNP specification [snp-fw-abi]_ for further
|
||||
details on the input parameters in ``struct kvm_sev_snp_launch_finish``.
|
||||
|
||||
21. KVM_SEV_SNP_ENABLE_REQ_CERTS
|
||||
--------------------------------
|
||||
|
||||
The KVM_SEV_SNP_ENABLE_REQ_CERTS command will configure KVM to exit to
|
||||
userspace with a ``KVM_EXIT_SNP_REQ_CERTS`` exit type as part of handling
|
||||
a guest attestation report, which will to allow userspace to provide a
|
||||
certificate corresponding to the endorsement key used by firmware to sign
|
||||
that attestation report.
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
NOTE: The endorsement key used by firmware may change as a result of
|
||||
management activities like updating SEV-SNP firmware or loading new
|
||||
endorsement keys, so some care should be taken to keep the returned
|
||||
certificate data in sync with the actual endorsement key in use by
|
||||
firmware at the time the attestation request is sent to SNP firmware. The
|
||||
recommended scheme to do this is to use file locking (e.g. via fcntl()'s
|
||||
F_OFD_SETLK) in the following manner:
|
||||
|
||||
- Prior to obtaining/providing certificate data as part of servicing an
|
||||
exit type of ``KVM_EXIT_SNP_REQ_CERTS``, the VMM should obtain a
|
||||
shared/read or exclusive/write lock on the certificate blob file before
|
||||
reading it and returning it to KVM, and continue to hold the lock until
|
||||
the attestation request is actually sent to firmware. To facilitate
|
||||
this, the VMM can set the ``immediate_exit`` flag of kvm_run just after
|
||||
supplying the certificate data, and just before resuming the vCPU.
|
||||
This will ensure the vCPU will exit again to userspace with ``-EINTR``
|
||||
after it finishes fetching the attestation request from firmware, at
|
||||
which point the VMM can safely drop the file lock.
|
||||
|
||||
- Tools/libraries that perform updates to SNP firmware TCB values or
|
||||
endorsement keys (e.g. via /dev/sev interfaces such as ``SNP_COMMIT``,
|
||||
``SNP_SET_CONFIG``, or ``SNP_VLEK_LOAD``, see
|
||||
Documentation/virt/coco/sev-guest.rst for more details) in such a way
|
||||
that the certificate blob needs to be updated, should similarly take an
|
||||
exclusive lock on the certificate blob for the duration of any updates
|
||||
to endorsement keys or the certificate blob contents to ensure that
|
||||
VMMs using the above scheme will not return certificate blob data that
|
||||
is out of sync with the endorsement key used by firmware at the time
|
||||
the attestation request is actually issued.
|
||||
|
||||
This scheme is recommended so that tools can use a fairly generic/natural
|
||||
approach to synchronizing firmware/certificate updates via file-locking,
|
||||
which should make it easier to maintain interoperability across
|
||||
tools/VMMs/vendors.
|
||||
|
||||
Device attribute API
|
||||
====================
|
||||
|
||||
|
|
@ -579,11 +625,15 @@ Attributes of the SEV implementation can be retrieved through the
|
|||
``KVM_HAS_DEVICE_ATTR`` and ``KVM_GET_DEVICE_ATTR`` ioctls on the ``/dev/kvm``
|
||||
device node, using group ``KVM_X86_GRP_SEV``.
|
||||
|
||||
Currently only one attribute is implemented:
|
||||
The following attributes are currently implemented:
|
||||
|
||||
* ``KVM_X86_SEV_VMSA_FEATURES``: return the set of all bits that
|
||||
are accepted in the ``vmsa_features`` of ``KVM_SEV_INIT2``.
|
||||
|
||||
* ``KVM_X86_SEV_SNP_REQ_CERTS``: return a value of 1 if the kernel supports the
|
||||
``KVM_EXIT_SNP_REQ_CERTS`` exit, which allows for fetching endorsement key
|
||||
certificates from userspace for each SNP attestation request the guest issues.
|
||||
|
||||
Firmware Management
|
||||
===================
|
||||
|
||||
|
|
|
|||
|
|
@ -472,6 +472,7 @@
|
|||
#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
|
||||
|
||||
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
|
||||
#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */
|
||||
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
|
||||
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
|
||||
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
|
||||
|
|
|
|||
|
|
@ -195,7 +195,15 @@ enum kvm_reg {
|
|||
|
||||
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
|
||||
VCPU_EXREG_CR0,
|
||||
/*
|
||||
* Alias AMD's ERAPS (not a real register) to CR3 so that common code
|
||||
* can trigger emulation of the RAP (Return Address Predictor) with
|
||||
* minimal support required in common code. Piggyback CR3 as the RAP
|
||||
* is cleared on writes to CR3, i.e. marking CR3 dirty will naturally
|
||||
* mark ERAPS dirty as well.
|
||||
*/
|
||||
VCPU_EXREG_CR3,
|
||||
VCPU_EXREG_ERAPS = VCPU_EXREG_CR3,
|
||||
VCPU_EXREG_CR4,
|
||||
VCPU_EXREG_RFLAGS,
|
||||
VCPU_EXREG_SEGMENTS,
|
||||
|
|
|
|||
|
|
@ -131,13 +131,13 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
u64 tsc_offset;
|
||||
u32 asid;
|
||||
u8 tlb_ctl;
|
||||
u8 reserved_2[3];
|
||||
u8 erap_ctl;
|
||||
u8 reserved_2[2];
|
||||
u32 int_ctl;
|
||||
u32 int_vector;
|
||||
u32 int_state;
|
||||
u8 reserved_3[4];
|
||||
u32 exit_code;
|
||||
u32 exit_code_hi;
|
||||
u64 exit_code;
|
||||
u64 exit_info_1;
|
||||
u64 exit_info_2;
|
||||
u32 exit_int_info;
|
||||
|
|
@ -182,6 +182,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
#define TLB_CONTROL_FLUSH_ASID 3
|
||||
#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
|
||||
|
||||
#define ERAP_CONTROL_ALLOW_LARGER_RAP BIT(0)
|
||||
#define ERAP_CONTROL_CLEAR_RAP BIT(1)
|
||||
|
||||
#define V_TPR_MASK 0x0f
|
||||
|
||||
#define V_IRQ_SHIFT 8
|
||||
|
|
|
|||
|
|
@ -503,6 +503,7 @@ struct kvm_sync_regs {
|
|||
#define KVM_X86_GRP_SEV 1
|
||||
# define KVM_X86_SEV_VMSA_FEATURES 0
|
||||
# define KVM_X86_SNP_POLICY_BITS 1
|
||||
# define KVM_X86_SEV_SNP_REQ_CERTS 2
|
||||
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
|
|
@ -743,6 +744,7 @@ enum sev_cmd_id {
|
|||
KVM_SEV_SNP_LAUNCH_START = 100,
|
||||
KVM_SEV_SNP_LAUNCH_UPDATE,
|
||||
KVM_SEV_SNP_LAUNCH_FINISH,
|
||||
KVM_SEV_SNP_ENABLE_REQ_CERTS,
|
||||
|
||||
KVM_SEV_NR_MAX,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -103,38 +103,38 @@
|
|||
#define SVM_EXIT_VMGEXIT 0x403
|
||||
|
||||
/* SEV-ES software-defined VMGEXIT events */
|
||||
#define SVM_VMGEXIT_MMIO_READ 0x80000001
|
||||
#define SVM_VMGEXIT_MMIO_WRITE 0x80000002
|
||||
#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003
|
||||
#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004
|
||||
#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
|
||||
#define SVM_VMGEXIT_MMIO_READ 0x80000001ull
|
||||
#define SVM_VMGEXIT_MMIO_WRITE 0x80000002ull
|
||||
#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003ull
|
||||
#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004ull
|
||||
#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005ull
|
||||
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
|
||||
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
|
||||
#define SVM_VMGEXIT_PSC 0x80000010
|
||||
#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
|
||||
#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
|
||||
#define SVM_VMGEXIT_AP_CREATION 0x80000013
|
||||
#define SVM_VMGEXIT_PSC 0x80000010ull
|
||||
#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011ull
|
||||
#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012ull
|
||||
#define SVM_VMGEXIT_AP_CREATION 0x80000013ull
|
||||
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
|
||||
#define SVM_VMGEXIT_AP_CREATE 1
|
||||
#define SVM_VMGEXIT_AP_DESTROY 2
|
||||
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
|
||||
#define SVM_VMGEXIT_SAVIC 0x8000001a
|
||||
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull
|
||||
#define SVM_VMGEXIT_SAVIC 0x8000001aull
|
||||
#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
|
||||
#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
|
||||
#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
|
||||
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
|
||||
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
|
||||
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffdull
|
||||
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffeull
|
||||
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
|
||||
/* SW_EXITINFO1[3:0] */ \
|
||||
(((((u64)reason_set) & 0xf)) | \
|
||||
/* SW_EXITINFO1[11:4] */ \
|
||||
((((u64)reason_code) & 0xff) << 4))
|
||||
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
|
||||
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffffull
|
||||
|
||||
/* Exit code reserved for hypervisor/software use */
|
||||
#define SVM_EXIT_SW 0xf0000000
|
||||
#define SVM_EXIT_SW 0xf0000000ull
|
||||
|
||||
#define SVM_EXIT_ERR -1
|
||||
#define SVM_EXIT_ERR -1ull
|
||||
|
||||
#define SVM_EXIT_REASONS \
|
||||
{ SVM_EXIT_READ_CR0, "read_cr0" }, \
|
||||
|
|
|
|||
|
|
@ -1223,6 +1223,7 @@ void kvm_set_cpu_caps(void)
|
|||
/* PrefetchCtlMsr */
|
||||
/* GpOnUserCpuid */
|
||||
/* EPSF */
|
||||
F(ERAPS),
|
||||
SYNTHESIZED_F(SBPB),
|
||||
SYNTHESIZED_F(IBPB_BRTYPE),
|
||||
SYNTHESIZED_F(SRSO_NO),
|
||||
|
|
@ -1803,8 +1804,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
|
|||
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
|
||||
break;
|
||||
case 0x80000021:
|
||||
entry->ebx = entry->edx = 0;
|
||||
entry->edx = 0;
|
||||
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
|
||||
|
||||
if (kvm_cpu_cap_has(X86_FEATURE_ERAPS))
|
||||
entry->ebx &= GENMASK(23, 16);
|
||||
else
|
||||
entry->ebx = 0;
|
||||
|
||||
cpuid_entry_override(entry, CPUID_8000_0021_ECX);
|
||||
break;
|
||||
/* AMD Extended Performance Monitoring and Debug */
|
||||
|
|
|
|||
|
|
@ -1224,13 +1224,13 @@ static bool __init avic_want_avic_enabled(void)
|
|||
* In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is
|
||||
* supported (to avoid enabling partial support by default, and because
|
||||
* x2AVIC should be supported by all Zen4+ CPUs). Explicitly check for
|
||||
* family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags
|
||||
* family 0x1A and later (Zen5+), as the kernel's synthetic ZenX flags
|
||||
* aren't inclusive of previous generations, i.e. the kernel will set
|
||||
* at most one ZenX feature flag.
|
||||
*/
|
||||
if (avic == AVIC_AUTO_MODE)
|
||||
avic = boot_cpu_has(X86_FEATURE_X2AVIC) &&
|
||||
(boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4));
|
||||
(cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1A);
|
||||
|
||||
if (!avic || !npt_enabled)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -10,8 +10,13 @@ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
/*
|
||||
* The exit code used by Hyper-V for software-defined exits is reserved
|
||||
* by AMD specifically for such use cases.
|
||||
*/
|
||||
BUILD_BUG_ON(HV_SVM_EXITCODE_ENL != SVM_EXIT_SW);
|
||||
|
||||
svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
nested_svm_vmexit(svm);
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
|
|||
* correctly fill in the high bits of exit_info_1.
|
||||
*/
|
||||
vmcb->control.exit_code = SVM_EXIT_NPF;
|
||||
vmcb->control.exit_code_hi = 0;
|
||||
vmcb->control.exit_info_1 = (1ULL << 32);
|
||||
vmcb->control.exit_info_2 = fault->address;
|
||||
}
|
||||
|
|
@ -403,6 +402,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
|
|||
return __nested_vmcb_check_controls(vcpu, ctl);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a feature is not advertised to L1, clear the corresponding vmcb12
|
||||
* intercept.
|
||||
*/
|
||||
#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
|
||||
do { \
|
||||
if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
|
||||
vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
|
||||
} while (0)
|
||||
|
||||
#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
|
||||
__nested_svm_sanitize_intercept(__vcpu, __control, name, name)
|
||||
|
||||
static
|
||||
void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
|
||||
struct vmcb_ctrl_area_cached *to,
|
||||
|
|
@ -413,15 +425,21 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
|
|||
for (i = 0; i < MAX_INTERCEPT; i++)
|
||||
to->intercepts[i] = from->intercepts[i];
|
||||
|
||||
__nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
|
||||
nested_svm_sanitize_intercept(vcpu, to, INVPCID);
|
||||
nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
|
||||
nested_svm_sanitize_intercept(vcpu, to, SKINIT);
|
||||
nested_svm_sanitize_intercept(vcpu, to, RDPRU);
|
||||
|
||||
to->iopm_base_pa = from->iopm_base_pa;
|
||||
to->msrpm_base_pa = from->msrpm_base_pa;
|
||||
to->tsc_offset = from->tsc_offset;
|
||||
to->tlb_ctl = from->tlb_ctl;
|
||||
to->erap_ctl = from->erap_ctl;
|
||||
to->int_ctl = from->int_ctl;
|
||||
to->int_vector = from->int_vector;
|
||||
to->int_state = from->int_state;
|
||||
to->exit_code = from->exit_code;
|
||||
to->exit_code_hi = from->exit_code_hi;
|
||||
to->exit_info_1 = from->exit_info_1;
|
||||
to->exit_info_2 = from->exit_info_2;
|
||||
to->exit_int_info = from->exit_int_info;
|
||||
|
|
@ -663,7 +681,6 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
|||
vmcb02->save.rsp = vmcb12->save.rsp;
|
||||
vmcb02->save.rip = vmcb12->save.rip;
|
||||
|
||||
/* These bits will be set properly on the first execution when new_vmc12 is true */
|
||||
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
|
||||
vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
|
||||
svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
|
||||
|
|
@ -727,8 +744,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
|||
enter_guest_mode(vcpu);
|
||||
|
||||
/*
|
||||
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
|
||||
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
|
||||
* Filled at exit: exit_code, exit_info_1, exit_info_2, exit_int_info,
|
||||
* exit_int_info_err, next_rip, insn_len, insn_bytes.
|
||||
*/
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
|
||||
|
|
@ -866,6 +883,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
|
||||
* let L2 use a larger RAP since KVM will emulate the necessary clears,
|
||||
* as it's possible L1 deliberately wants to restrict L2 to the legacy
|
||||
* RAP size. Unconditionally clear the RAP on nested VMRUN, as KVM is
|
||||
* responsible for emulating the host vs. guest tags (L1 is the "host",
|
||||
* L2 is the "guest").
|
||||
*/
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
|
||||
vmcb02->control.erap_ctl = (svm->nested.ctl.erap_ctl &
|
||||
ERAP_CONTROL_ALLOW_LARGER_RAP) |
|
||||
ERAP_CONTROL_CLEAR_RAP;
|
||||
|
||||
/*
|
||||
* Merge guest and host intercepts - must be called with vcpu in
|
||||
* guest-mode to take effect.
|
||||
|
|
@ -985,7 +1015,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
|
|||
if (!nested_vmcb_check_save(vcpu) ||
|
||||
!nested_vmcb_check_controls(vcpu)) {
|
||||
vmcb12->control.exit_code = SVM_EXIT_ERR;
|
||||
vmcb12->control.exit_code_hi = -1u;
|
||||
vmcb12->control.exit_info_1 = 0;
|
||||
vmcb12->control.exit_info_2 = 0;
|
||||
goto out;
|
||||
|
|
@ -1018,7 +1047,6 @@ out_exit_err:
|
|||
svm->soft_int_injected = false;
|
||||
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
|
||||
svm->vmcb->control.exit_code_hi = -1u;
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
|
||||
|
|
@ -1130,11 +1158,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
|||
|
||||
vmcb12->control.int_state = vmcb02->control.int_state;
|
||||
vmcb12->control.exit_code = vmcb02->control.exit_code;
|
||||
vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi;
|
||||
vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1;
|
||||
vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2;
|
||||
|
||||
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
|
||||
if (!svm_is_vmrun_failure(vmcb12->control.exit_code))
|
||||
nested_save_pending_event_to_vmcb12(svm, vmcb12);
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
|
||||
|
|
@ -1161,6 +1188,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
|
|||
|
||||
kvm_nested_vmexit_handle_ibrs(vcpu);
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
|
||||
vmcb01->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
|
||||
|
||||
svm_switch_vmcb(svm, &svm->vmcb01);
|
||||
|
||||
/*
|
||||
|
|
@ -1363,6 +1393,8 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
|
|||
nested_svm_uninit_mmu_context(vcpu);
|
||||
vmcb_mark_all_dirty(svm->vmcb);
|
||||
|
||||
svm_set_gif(svm, true);
|
||||
|
||||
if (kvm_apicv_activated(vcpu->kvm))
|
||||
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
|
||||
}
|
||||
|
|
@ -1422,9 +1454,12 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
|
|||
|
||||
static int nested_svm_intercept(struct vcpu_svm *svm)
|
||||
{
|
||||
u32 exit_code = svm->vmcb->control.exit_code;
|
||||
u64 exit_code = svm->vmcb->control.exit_code;
|
||||
int vmexit = NESTED_EXIT_HOST;
|
||||
|
||||
if (svm_is_vmrun_failure(exit_code))
|
||||
return NESTED_EXIT_DONE;
|
||||
|
||||
switch (exit_code) {
|
||||
case SVM_EXIT_MSR:
|
||||
vmexit = nested_svm_exit_handled_msr(svm);
|
||||
|
|
@ -1432,7 +1467,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
|
|||
case SVM_EXIT_IOIO:
|
||||
vmexit = nested_svm_intercept_ioio(svm);
|
||||
break;
|
||||
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
|
||||
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f:
|
||||
/*
|
||||
* Host-intercepted exceptions have been checked already in
|
||||
* nested_svm_exit_special. There is nothing to do here,
|
||||
|
|
@ -1440,15 +1475,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
|
|||
*/
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
break;
|
||||
}
|
||||
case SVM_EXIT_ERR: {
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
default:
|
||||
if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return vmexit;
|
||||
|
|
@ -1496,7 +1526,6 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
|
|||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
|
||||
vmcb->control.exit_code_hi = 0;
|
||||
|
||||
if (ex->has_error_code)
|
||||
vmcb->control.exit_info_1 = ex->error_code;
|
||||
|
|
@ -1667,11 +1696,11 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
|
|||
dst->tsc_offset = from->tsc_offset;
|
||||
dst->asid = from->asid;
|
||||
dst->tlb_ctl = from->tlb_ctl;
|
||||
dst->erap_ctl = from->erap_ctl;
|
||||
dst->int_ctl = from->int_ctl;
|
||||
dst->int_vector = from->int_vector;
|
||||
dst->int_state = from->int_state;
|
||||
dst->exit_code = from->exit_code;
|
||||
dst->exit_code_hi = from->exit_code_hi;
|
||||
dst->exit_info_1 = from->exit_info_1;
|
||||
dst->exit_info_2 = from->exit_info_2;
|
||||
dst->exit_int_info = from->exit_int_info;
|
||||
|
|
@ -1782,12 +1811,12 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
|||
/*
|
||||
* If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
|
||||
* EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
|
||||
* If SVME is disabled, the only valid states are "none" and GIF=1
|
||||
* (clearing SVME does NOT set GIF, i.e. GIF=0 is allowed).
|
||||
*/
|
||||
if (!(vcpu->arch.efer & EFER_SVME)) {
|
||||
/* GIF=1 and no guest mode are required if SVME=0. */
|
||||
if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!(vcpu->arch.efer & EFER_SVME) && kvm_state->flags &&
|
||||
kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
|
||||
return -EINVAL;
|
||||
|
||||
/* SMM temporarily disables SVM, so we cannot be in guest mode. */
|
||||
if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
|
||||
|
|
@ -1870,10 +1899,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
|
|||
* thus MMU might not be initialized correctly.
|
||||
* Set it again to fix this.
|
||||
*/
|
||||
|
||||
ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
|
||||
nested_npt_enabled(svm), false);
|
||||
if (WARN_ON_ONCE(ret))
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
svm->nested.force_msr_bitmap_recalc = true;
|
||||
|
|
|
|||
|
|
@ -41,6 +41,16 @@
|
|||
|
||||
#define GHCB_HV_FT_SUPPORTED (GHCB_HV_FT_SNP | GHCB_HV_FT_SNP_AP_CREATION)
|
||||
|
||||
/*
|
||||
* The GHCB spec essentially states that all non-zero error codes other than
|
||||
* those explicitly defined above should be treated as an error by the guest.
|
||||
* Define a generic error to cover that case, and choose a value that is not
|
||||
* likely to overlap with new explicit error codes should more be added to
|
||||
* the GHCB spec later. KVM will use this to report generic errors when
|
||||
* handling SNP guest requests.
|
||||
*/
|
||||
#define SNP_GUEST_VMM_ERR_GENERIC (~0U)
|
||||
|
||||
/* enable/disable SEV support */
|
||||
static bool sev_enabled = true;
|
||||
module_param_named(sev, sev_enabled, bool, 0444);
|
||||
|
|
@ -53,11 +63,6 @@ module_param_named(sev_es, sev_es_enabled, bool, 0444);
|
|||
static bool sev_snp_enabled = true;
|
||||
module_param_named(sev_snp, sev_snp_enabled, bool, 0444);
|
||||
|
||||
/* enable/disable SEV-ES DebugSwap support */
|
||||
static bool sev_es_debug_swap_enabled = true;
|
||||
module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
|
||||
static u64 sev_supported_vmsa_features;
|
||||
|
||||
static unsigned int nr_ciphertext_hiding_asids;
|
||||
module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444);
|
||||
|
||||
|
|
@ -84,6 +89,8 @@ module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 04
|
|||
|
||||
static u64 snp_supported_policy_bits __ro_after_init;
|
||||
|
||||
static u64 sev_supported_vmsa_features __ro_after_init;
|
||||
|
||||
#define INITIAL_VMSA_GPA 0xFFFFFFFFF000
|
||||
|
||||
static u8 sev_enc_bit;
|
||||
|
|
@ -2151,6 +2158,9 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
|
|||
*val = snp_supported_policy_bits;
|
||||
return 0;
|
||||
|
||||
case KVM_X86_SEV_SNP_REQ_CERTS:
|
||||
*val = sev_snp_enabled ? 1 : 0;
|
||||
return 0;
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
|
|
@ -2567,6 +2577,16 @@ e_free:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int snp_enable_certs(struct kvm *kvm)
|
||||
{
|
||||
if (kvm->created_vcpus || !sev_snp_guest(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
to_kvm_sev_info(kvm)->snp_certs_enabled = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
|
||||
{
|
||||
struct kvm_sev_cmd sev_cmd;
|
||||
|
|
@ -2672,6 +2692,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
|
|||
case KVM_SEV_SNP_LAUNCH_FINISH:
|
||||
r = snp_launch_finish(kvm, &sev_cmd);
|
||||
break;
|
||||
case KVM_SEV_SNP_ENABLE_REQ_CERTS:
|
||||
r = snp_enable_certs(kvm);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
goto out;
|
||||
|
|
@ -3150,12 +3173,10 @@ out:
|
|||
sev_es_enabled = sev_es_supported;
|
||||
sev_snp_enabled = sev_snp_supported;
|
||||
|
||||
if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
|
||||
!cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
|
||||
sev_es_debug_swap_enabled = false;
|
||||
|
||||
sev_supported_vmsa_features = 0;
|
||||
if (sev_es_debug_swap_enabled)
|
||||
|
||||
if (sev_es_enabled && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) &&
|
||||
cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
|
||||
sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP;
|
||||
|
||||
if (sev_snp_enabled && tsc_khz && cpu_feature_enabled(X86_FEATURE_SNP_SECURE_TSC))
|
||||
|
|
@ -3275,11 +3296,6 @@ skip_vmsa_free:
|
|||
kvfree(svm->sev_es.ghcb_sa);
|
||||
}
|
||||
|
||||
static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control)
|
||||
{
|
||||
return (((u64)control->exit_code_hi) << 32) | control->exit_code;
|
||||
}
|
||||
|
||||
static void dump_ghcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
|
|
@ -3301,7 +3317,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
|
|||
*/
|
||||
pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa);
|
||||
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
|
||||
kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm));
|
||||
control->exit_code, kvm_ghcb_sw_exit_code_is_valid(svm));
|
||||
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
|
||||
control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm));
|
||||
pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
|
||||
|
|
@ -3335,7 +3351,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
|
|||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
struct ghcb *ghcb = svm->sev_es.ghcb;
|
||||
u64 exit_code;
|
||||
|
||||
/*
|
||||
* The GHCB protocol so far allows for the following data
|
||||
|
|
@ -3369,9 +3384,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
|
|||
__kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm));
|
||||
|
||||
/* Copy the GHCB exit information into the VMCB fields */
|
||||
exit_code = kvm_ghcb_get_sw_exit_code(svm);
|
||||
control->exit_code = lower_32_bits(exit_code);
|
||||
control->exit_code_hi = upper_32_bits(exit_code);
|
||||
control->exit_code = kvm_ghcb_get_sw_exit_code(svm);
|
||||
control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm);
|
||||
control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm);
|
||||
svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm);
|
||||
|
|
@ -3384,15 +3397,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
|||
{
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u64 exit_code;
|
||||
u64 reason;
|
||||
|
||||
/*
|
||||
* Retrieve the exit code now even though it may not be marked valid
|
||||
* as it could help with debugging.
|
||||
*/
|
||||
exit_code = kvm_get_cached_sw_exit_code(control);
|
||||
|
||||
/* Only GHCB Usage code 0 is supported */
|
||||
if (svm->sev_es.ghcb->ghcb_usage) {
|
||||
reason = GHCB_ERR_INVALID_USAGE;
|
||||
|
|
@ -3406,7 +3412,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
|||
!kvm_ghcb_sw_exit_info_2_is_valid(svm))
|
||||
goto vmgexit_err;
|
||||
|
||||
switch (exit_code) {
|
||||
switch (control->exit_code) {
|
||||
case SVM_EXIT_READ_DR7:
|
||||
break;
|
||||
case SVM_EXIT_WRITE_DR7:
|
||||
|
|
@ -3507,15 +3513,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
|
|||
return 0;
|
||||
|
||||
vmgexit_err:
|
||||
/*
|
||||
* Print the exit code even though it may not be marked valid as it
|
||||
* could help with debugging.
|
||||
*/
|
||||
if (reason == GHCB_ERR_INVALID_USAGE) {
|
||||
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
|
||||
svm->sev_es.ghcb->ghcb_usage);
|
||||
} else if (reason == GHCB_ERR_INVALID_EVENT) {
|
||||
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
|
||||
exit_code);
|
||||
control->exit_code);
|
||||
} else {
|
||||
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
|
||||
exit_code);
|
||||
control->exit_code);
|
||||
dump_ghcb(svm);
|
||||
}
|
||||
|
||||
|
|
@ -4155,6 +4165,36 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int snp_req_certs_err(struct vcpu_svm *svm, u32 vmm_error)
|
||||
{
|
||||
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_error, 0));
|
||||
|
||||
return 1; /* resume guest */
|
||||
}
|
||||
|
||||
static int snp_complete_req_certs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
|
||||
switch (READ_ONCE(vcpu->run->snp_req_certs.ret)) {
|
||||
case 0:
|
||||
return snp_handle_guest_req(svm, control->exit_info_1,
|
||||
control->exit_info_2);
|
||||
case ENOSPC:
|
||||
vcpu->arch.regs[VCPU_REGS_RBX] = vcpu->run->snp_req_certs.npages;
|
||||
return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_INVALID_LEN);
|
||||
case EAGAIN:
|
||||
return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_BUSY);
|
||||
case EIO:
|
||||
return snp_req_certs_err(svm, SNP_GUEST_VMM_ERR_GENERIC);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa)
|
||||
{
|
||||
struct kvm *kvm = svm->vcpu.kvm;
|
||||
|
|
@ -4170,14 +4210,15 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
|
|||
/*
|
||||
* As per GHCB spec, requests of type MSG_REPORT_REQ also allow for
|
||||
* additional certificate data to be provided alongside the attestation
|
||||
* report via the guest-provided data pages indicated by RAX/RBX. The
|
||||
* certificate data is optional and requires additional KVM enablement
|
||||
* to provide an interface for userspace to provide it, but KVM still
|
||||
* needs to be able to handle extended guest requests either way. So
|
||||
* provide a stub implementation that will always return an empty
|
||||
* certificate table in the guest-provided data pages.
|
||||
* report via the guest-provided data pages indicated by RAX/RBX. If
|
||||
* userspace enables KVM_EXIT_SNP_REQ_CERTS, then exit to userspace
|
||||
* to give userspace an opportunity to provide the certificate data
|
||||
* before issuing/completing the attestation request. Otherwise, return
|
||||
* an empty certificate table in the guest-provided data pages and
|
||||
* handle the attestation request immediately.
|
||||
*/
|
||||
if (msg_type == SNP_MSG_REPORT_REQ) {
|
||||
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
u64 data_npages;
|
||||
gpa_t data_gpa;
|
||||
|
|
@ -4191,6 +4232,15 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r
|
|||
if (!PAGE_ALIGNED(data_gpa))
|
||||
goto request_invalid;
|
||||
|
||||
if (sev->snp_certs_enabled) {
|
||||
vcpu->run->exit_reason = KVM_EXIT_SNP_REQ_CERTS;
|
||||
vcpu->run->snp_req_certs.gpa = data_gpa;
|
||||
vcpu->run->snp_req_certs.npages = data_npages;
|
||||
vcpu->run->snp_req_certs.ret = 0;
|
||||
vcpu->arch.complete_userspace_io = snp_complete_req_certs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* As per GHCB spec (see "SNP Extended Guest Request"), the
|
||||
* certificate table is terminated by 24-bytes of zeroes.
|
||||
|
|
@ -4354,7 +4404,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb_control_area *control = &svm->vmcb->control;
|
||||
u64 ghcb_gpa, exit_code;
|
||||
u64 ghcb_gpa;
|
||||
int ret;
|
||||
|
||||
/* Validate the GHCB */
|
||||
|
|
@ -4396,8 +4446,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
|
||||
svm_vmgexit_success(svm, 0);
|
||||
|
||||
exit_code = kvm_get_cached_sw_exit_code(control);
|
||||
switch (exit_code) {
|
||||
switch (control->exit_code) {
|
||||
case SVM_VMGEXIT_MMIO_READ:
|
||||
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
|
||||
if (ret)
|
||||
|
|
@ -4489,7 +4538,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
|
|||
ret = -EINVAL;
|
||||
break;
|
||||
default:
|
||||
ret = svm_invoke_exit_handler(vcpu, exit_code);
|
||||
ret = svm_invoke_exit_handler(vcpu, control->exit_code);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -215,7 +215,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
|||
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
|
||||
if (!(efer & EFER_SVME)) {
|
||||
svm_leave_nested(vcpu);
|
||||
svm_set_gif(svm, true);
|
||||
/* #GP intercept is still needed for vmware backdoor */
|
||||
if (!enable_vmware_backdoor)
|
||||
clr_exception_intercept(svm, GP_VECTOR);
|
||||
|
|
@ -996,10 +995,14 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
|
|||
svm_set_intercept(svm, INTERCEPT_RDTSCP);
|
||||
}
|
||||
|
||||
/*
|
||||
* No need to toggle VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK here, it is
|
||||
* always set if vls is enabled. If the intercepts are set, the bit is
|
||||
* meaningless anyway.
|
||||
*/
|
||||
if (guest_cpuid_is_intel_compatible(vcpu)) {
|
||||
svm_set_intercept(svm, INTERCEPT_VMLOAD);
|
||||
svm_set_intercept(svm, INTERCEPT_VMSAVE);
|
||||
svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
} else {
|
||||
/*
|
||||
* If hardware supports Virtual VMLOAD VMSAVE then enable it
|
||||
|
|
@ -1008,7 +1011,6 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu)
|
|||
if (vls) {
|
||||
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
|
||||
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
|
||||
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1141,6 +1143,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
|
|||
svm_clr_intercept(svm, INTERCEPT_PAUSE);
|
||||
}
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
|
||||
svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP;
|
||||
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_init_vmcb(svm, vmcb);
|
||||
|
||||
|
|
@ -1153,6 +1158,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
|
|||
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
|
||||
}
|
||||
|
||||
if (vls)
|
||||
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
|
||||
|
||||
if (vcpu->kvm->arch.bus_lock_detection_enabled)
|
||||
svm_set_intercept(svm, INTERCEPT_BUSLOCK);
|
||||
|
||||
|
|
@ -1862,13 +1870,16 @@ static int pf_interception(struct kvm_vcpu *vcpu)
|
|||
svm->vmcb->control.insn_len);
|
||||
}
|
||||
|
||||
static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len);
|
||||
|
||||
static int npf_interception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
int rc;
|
||||
|
||||
u64 fault_address = svm->vmcb->control.exit_info_2;
|
||||
u64 error_code = svm->vmcb->control.exit_info_1;
|
||||
gpa_t gpa = svm->vmcb->control.exit_info_2;
|
||||
|
||||
/*
|
||||
* WARN if hardware generates a fault with an error code that collides
|
||||
|
|
@ -1879,17 +1890,35 @@ static int npf_interception(struct kvm_vcpu *vcpu)
|
|||
if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
|
||||
error_code &= ~PFERR_SYNTHETIC_MASK;
|
||||
|
||||
/*
|
||||
* Expedite fast MMIO kicks if the next RIP is known and KVM is allowed
|
||||
* emulate a page fault, e.g. skipping the current instruction is wrong
|
||||
* if the #NPF occurred while vectoring an event.
|
||||
*/
|
||||
if ((error_code & PFERR_RSVD_MASK) && !is_guest_mode(vcpu)) {
|
||||
const int emul_type = EMULTYPE_PF | EMULTYPE_NO_DECODE;
|
||||
|
||||
if (svm_check_emulate_instruction(vcpu, emul_type, NULL, 0))
|
||||
return 1;
|
||||
|
||||
if (nrips && svm->vmcb->control.next_rip &&
|
||||
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
|
||||
trace_kvm_fast_mmio(gpa);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
if (sev_snp_guest(vcpu->kvm) && (error_code & PFERR_GUEST_ENC_MASK))
|
||||
error_code |= PFERR_PRIVATE_ACCESS;
|
||||
|
||||
trace_kvm_page_fault(vcpu, fault_address, error_code);
|
||||
rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
|
||||
trace_kvm_page_fault(vcpu, gpa, error_code);
|
||||
rc = kvm_mmu_page_fault(vcpu, gpa, error_code,
|
||||
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
|
||||
svm->vmcb->control.insn_bytes : NULL,
|
||||
svm->vmcb->control.insn_len);
|
||||
|
||||
if (rc > 0 && error_code & PFERR_GUEST_RMP_MASK)
|
||||
sev_handle_rmp_fault(vcpu, fault_address, error_code);
|
||||
sev_handle_rmp_fault(vcpu, gpa, error_code);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
|
@ -2099,12 +2128,13 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
|
|||
|
||||
ret = kvm_skip_emulated_instruction(vcpu);
|
||||
|
||||
/* KVM always performs VMLOAD/VMSAVE on VMCB01 (see __svm_vcpu_run()) */
|
||||
if (vmload) {
|
||||
svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
|
||||
svm_copy_vmloadsave_state(svm->vmcb01.ptr, vmcb12);
|
||||
svm->sysenter_eip_hi = 0;
|
||||
svm->sysenter_esp_hi = 0;
|
||||
} else {
|
||||
svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
|
||||
svm_copy_vmloadsave_state(vmcb12, svm->vmcb01.ptr);
|
||||
}
|
||||
|
||||
kvm_vcpu_unmap(vcpu, &map);
|
||||
|
|
@ -2443,7 +2473,6 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
|
|||
|
||||
if (cr0 ^ val) {
|
||||
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
|
||||
}
|
||||
|
||||
|
|
@ -3272,10 +3301,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
|
|||
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
|
||||
pr_err("%-20s%d\n", "asid:", control->asid);
|
||||
pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
|
||||
pr_err("%-20s%d\n", "erap_ctl:", control->erap_ctl);
|
||||
pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
|
||||
pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
|
||||
pr_err("%-20s%08x\n", "int_state:", control->int_state);
|
||||
pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
|
||||
pr_err("%-20s%016llx\n", "exit_code:", control->exit_code);
|
||||
pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
|
||||
pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
|
||||
pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
|
||||
|
|
@ -3443,23 +3473,21 @@ no_vmsa:
|
|||
sev_free_decrypted_vmsa(vcpu, save);
|
||||
}
|
||||
|
||||
static bool svm_check_exit_valid(u64 exit_code)
|
||||
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 __exit_code)
|
||||
{
|
||||
return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
|
||||
svm_exit_handlers[exit_code]);
|
||||
}
|
||||
u32 exit_code = __exit_code;
|
||||
|
||||
static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
|
||||
{
|
||||
dump_vmcb(vcpu);
|
||||
kvm_prepare_unexpected_reason_exit(vcpu, exit_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
|
||||
{
|
||||
if (!svm_check_exit_valid(exit_code))
|
||||
return svm_handle_invalid_exit(vcpu, exit_code);
|
||||
/*
|
||||
* SVM uses negative values, i.e. 64-bit values, to indicate that VMRUN
|
||||
* failed. Report all such errors to userspace (note, VMEXIT_INVALID,
|
||||
* a.k.a. SVM_EXIT_ERR, is special cased by svm_handle_exit()). Skip
|
||||
* the check when running as a VM, as KVM has historically left garbage
|
||||
* in bits 63:32, i.e. running KVM-on-KVM would hit false positives if
|
||||
* the underlying kernel is buggy.
|
||||
*/
|
||||
if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR) &&
|
||||
(u64)exit_code != __exit_code)
|
||||
goto unexpected_vmexit;
|
||||
|
||||
#ifdef CONFIG_MITIGATION_RETPOLINE
|
||||
if (exit_code == SVM_EXIT_MSR)
|
||||
|
|
@ -3477,7 +3505,19 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
|
|||
return sev_handle_vmgexit(vcpu);
|
||||
#endif
|
||||
#endif
|
||||
if (exit_code >= ARRAY_SIZE(svm_exit_handlers))
|
||||
goto unexpected_vmexit;
|
||||
|
||||
exit_code = array_index_nospec(exit_code, ARRAY_SIZE(svm_exit_handlers));
|
||||
if (!svm_exit_handlers[exit_code])
|
||||
goto unexpected_vmexit;
|
||||
|
||||
return svm_exit_handlers[exit_code](vcpu);
|
||||
|
||||
unexpected_vmexit:
|
||||
dump_vmcb(vcpu);
|
||||
kvm_prepare_unexpected_reason_exit(vcpu, __exit_code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
|
||||
|
|
@ -3516,7 +3556,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
|||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
u32 exit_code = svm->vmcb->control.exit_code;
|
||||
|
||||
/* SEV-ES guests must use the CR write traps to track CR registers. */
|
||||
if (!sev_es_guest(vcpu->kvm)) {
|
||||
|
|
@ -3540,7 +3579,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
|
||||
if (svm_is_vmrun_failure(svm->vmcb->control.exit_code)) {
|
||||
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
|
||||
kvm_run->fail_entry.hardware_entry_failure_reason
|
||||
= svm->vmcb->control.exit_code;
|
||||
|
|
@ -3552,7 +3591,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
|
|||
if (exit_fastpath != EXIT_FASTPATH_NONE)
|
||||
return 1;
|
||||
|
||||
return svm_invoke_exit_handler(vcpu, exit_code);
|
||||
return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
|
||||
}
|
||||
|
||||
static int pre_svm_run(struct kvm_vcpu *vcpu)
|
||||
|
|
@ -3983,6 +4022,13 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
|
|||
invlpga(gva, svm->vmcb->control.asid);
|
||||
}
|
||||
|
||||
static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS);
|
||||
|
||||
svm_flush_tlb_asid(vcpu);
|
||||
}
|
||||
|
||||
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
|
@ -4241,6 +4287,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
|||
}
|
||||
svm->vmcb->save.cr2 = vcpu->arch.cr2;
|
||||
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS) &&
|
||||
kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
|
||||
svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
|
||||
|
||||
svm_hv_update_vp_id(svm->vmcb, vcpu);
|
||||
|
||||
/*
|
||||
|
|
@ -4311,13 +4361,21 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
|
|||
|
||||
/* Track VMRUNs that have made past consistency checking */
|
||||
if (svm->nested.nested_run_pending &&
|
||||
svm->vmcb->control.exit_code != SVM_EXIT_ERR)
|
||||
!svm_is_vmrun_failure(svm->vmcb->control.exit_code))
|
||||
++vcpu->stat.nested_run;
|
||||
|
||||
svm->nested.nested_run_pending = 0;
|
||||
}
|
||||
|
||||
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
|
||||
|
||||
/*
|
||||
* Unconditionally mask off the CLEAR_RAP bit, the AND is just as cheap
|
||||
* as the TEST+Jcc to avoid it.
|
||||
*/
|
||||
if (cpu_feature_enabled(X86_FEATURE_ERAPS))
|
||||
svm->vmcb->control.erap_ctl &= ~ERAP_CONTROL_CLEAR_RAP;
|
||||
|
||||
vmcb_mark_all_clean(svm->vmcb);
|
||||
|
||||
/* if exit due to PF check for async PF */
|
||||
|
|
@ -4618,7 +4676,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
|
|||
if (static_cpu_has(X86_FEATURE_NRIPS))
|
||||
vmcb->control.next_rip = info->next_rip;
|
||||
vmcb->control.exit_code = icpt_info.exit_code;
|
||||
vmcb->control.exit_code_hi = 0;
|
||||
vmexit = nested_svm_exit_handled(svm);
|
||||
|
||||
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
|
||||
|
|
@ -5073,7 +5130,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
|
|||
.flush_tlb_all = svm_flush_tlb_all,
|
||||
.flush_tlb_current = svm_flush_tlb_current,
|
||||
.flush_tlb_gva = svm_flush_tlb_gva,
|
||||
.flush_tlb_guest = svm_flush_tlb_asid,
|
||||
.flush_tlb_guest = svm_flush_tlb_guest,
|
||||
|
||||
.vcpu_pre_run = svm_vcpu_pre_run,
|
||||
.vcpu_run = svm_vcpu_run,
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ struct kvm_sev_info {
|
|||
void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
|
||||
struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
|
||||
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
|
||||
bool snp_certs_enabled; /* SNP certificate-fetching support. */
|
||||
};
|
||||
|
||||
struct kvm_svm {
|
||||
|
|
@ -156,11 +157,11 @@ struct vmcb_ctrl_area_cached {
|
|||
u64 tsc_offset;
|
||||
u32 asid;
|
||||
u8 tlb_ctl;
|
||||
u8 erap_ctl;
|
||||
u32 int_ctl;
|
||||
u32 int_vector;
|
||||
u32 int_state;
|
||||
u32 exit_code;
|
||||
u32 exit_code_hi;
|
||||
u64 exit_code;
|
||||
u64 exit_info_1;
|
||||
u64 exit_info_2;
|
||||
u32 exit_int_info;
|
||||
|
|
@ -424,6 +425,14 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
|
|||
return container_of(vcpu, struct vcpu_svm, vcpu);
|
||||
}
|
||||
|
||||
static inline bool svm_is_vmrun_failure(u64 exit_code)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
|
||||
return (u32)exit_code == (u32)SVM_EXIT_ERR;
|
||||
|
||||
return exit_code == SVM_EXIT_ERR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only the PDPTRs are loaded on demand into the shadow MMU. All other
|
||||
* fields are synchronized on VM-Exit, because accessing the VMCB is cheap.
|
||||
|
|
@ -434,28 +443,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
|
|||
*/
|
||||
#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
|
||||
|
||||
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
|
||||
static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
__set_bit(bit, (unsigned long *)&control->intercepts);
|
||||
__set_bit(bit, intercepts);
|
||||
}
|
||||
|
||||
static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
__clear_bit(bit, intercepts);
|
||||
}
|
||||
|
||||
static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
return test_bit(bit, intercepts);
|
||||
}
|
||||
|
||||
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
|
||||
{
|
||||
__vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
|
||||
}
|
||||
|
||||
static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
__clear_bit(bit, (unsigned long *)&control->intercepts);
|
||||
__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
|
||||
}
|
||||
|
||||
static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
return test_bit(bit, (unsigned long *)&control->intercepts);
|
||||
return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
|
||||
}
|
||||
|
||||
static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
|
||||
{
|
||||
__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
|
||||
}
|
||||
|
||||
static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
|
||||
{
|
||||
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
|
||||
return test_bit(bit, (unsigned long *)&control->intercepts);
|
||||
return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
|
||||
}
|
||||
|
||||
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
|
||||
|
|
@ -762,7 +790,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm);
|
|||
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
|
||||
{
|
||||
svm->vmcb->control.exit_code = exit_code;
|
||||
svm->vmcb->control.exit_code_hi = 0;
|
||||
svm->vmcb->control.exit_info_1 = 0;
|
||||
svm->vmcb->control.exit_info_2 = 0;
|
||||
return nested_svm_vmexit(svm);
|
||||
|
|
|
|||
|
|
@ -383,10 +383,10 @@ TRACE_EVENT(kvm_apic,
|
|||
#define kvm_print_exit_reason(exit_reason, isa) \
|
||||
(isa == KVM_ISA_VMX) ? \
|
||||
__print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \
|
||||
__print_symbolic(exit_reason, SVM_EXIT_REASONS), \
|
||||
__print_symbolic_u64(exit_reason, SVM_EXIT_REASONS), \
|
||||
(isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \
|
||||
(isa == KVM_ISA_VMX) ? \
|
||||
__print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
|
||||
__print_flags_u64(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
|
||||
|
||||
#define TRACE_EVENT_KVM_EXIT(name) \
|
||||
TRACE_EVENT(name, \
|
||||
|
|
@ -781,7 +781,7 @@ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit);
|
|||
* Tracepoint for #VMEXIT reinjected to the guest
|
||||
*/
|
||||
TRACE_EVENT(kvm_nested_vmexit_inject,
|
||||
TP_PROTO(__u32 exit_code,
|
||||
TP_PROTO(__u64 exit_code,
|
||||
__u64 exit_info1, __u64 exit_info2,
|
||||
__u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
|
||||
TP_ARGS(exit_code, exit_info1, exit_info2,
|
||||
|
|
|
|||
|
|
@ -14143,6 +14143,13 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* When ERAPS is supported, invalidating a specific PCID clears
|
||||
* the RAP (Return Address Predicator).
|
||||
*/
|
||||
if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
|
||||
kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS);
|
||||
|
||||
kvm_invalidate_pcid(vcpu, operand.pcid);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
|
||||
|
|
@ -14156,6 +14163,11 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
|
|||
|
||||
fallthrough;
|
||||
case INVPCID_TYPE_ALL_INCL_GLOBAL:
|
||||
/*
|
||||
* Don't bother marking VCPU_EXREG_ERAPS dirty, SVM will take
|
||||
* care of doing so when emulating the full guest TLB flush
|
||||
* (the RAP is cleared on all implicit TLB flushes).
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
||||
return kvm_skip_emulated_instruction(vcpu);
|
||||
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ struct hv_vmcb_enlightenments {
|
|||
#define HV_VMCB_NESTED_ENLIGHTENMENTS 31
|
||||
|
||||
/* Synthetic VM-Exit */
|
||||
#define HV_SVM_EXITCODE_ENL 0xf0000000
|
||||
#define HV_SVM_EXITCODE_ENL 0xf0000000ull
|
||||
#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
|
||||
|
||||
/* VM_PARTITION_ASSIST_PAGE */
|
||||
|
|
|
|||
|
|
@ -135,6 +135,12 @@ struct kvm_xen_exit {
|
|||
} u;
|
||||
};
|
||||
|
||||
struct kvm_exit_snp_req_certs {
|
||||
__u64 gpa;
|
||||
__u64 npages;
|
||||
__u64 ret;
|
||||
};
|
||||
|
||||
#define KVM_S390_GET_SKEYS_NONE 1
|
||||
#define KVM_S390_SKEYS_MAX 1048576
|
||||
|
||||
|
|
@ -181,6 +187,7 @@ struct kvm_xen_exit {
|
|||
#define KVM_EXIT_TDX 40
|
||||
#define KVM_EXIT_ARM_SEA 41
|
||||
#define KVM_EXIT_ARM_LDST64B 42
|
||||
#define KVM_EXIT_SNP_REQ_CERTS 43
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
|
|
@ -483,6 +490,8 @@ struct kvm_run {
|
|||
__u64 gva;
|
||||
__u64 gpa;
|
||||
} arm_sea;
|
||||
/* KVM_EXIT_SNP_REQ_CERTS */
|
||||
struct kvm_exit_snp_req_certs snp_req_certs;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
|
|||
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_set_state_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
|
||||
TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
|
||||
|
|
@ -121,7 +122,6 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
|
|||
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
|
||||
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
|
||||
TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
|
||||
TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
|
||||
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
|
||||
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
|
||||
TEST_GEN_PROGS_x86 += x86/xapic_state_test
|
||||
|
|
|
|||
|
|
@ -92,8 +92,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||
u32 int_vector;
|
||||
u32 int_state;
|
||||
u8 reserved_3[4];
|
||||
u32 exit_code;
|
||||
u32 exit_code_hi;
|
||||
u64 exit_code;
|
||||
u64 exit_info_1;
|
||||
u64 exit_info_2;
|
||||
u32 exit_int_info;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* vmx_set_nested_state_test
|
||||
*
|
||||
* Copyright (C) 2019, Google LLC.
|
||||
*
|
||||
* This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
|
||||
|
|
@ -11,6 +9,7 @@
|
|||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "vmx.h"
|
||||
#include "svm_util.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/kvm.h>
|
||||
|
|
@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
|
|||
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
|
||||
"Size must be between %ld and %d. The size returned was %d.",
|
||||
sizeof(*state), state_sz, state->size);
|
||||
TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
|
||||
TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
|
||||
|
||||
TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
|
||||
TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
|
||||
TEST_ASSERT_EQ(state->flags, 0);
|
||||
|
||||
free(state);
|
||||
}
|
||||
|
||||
static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
|
||||
|
||||
vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
|
||||
}
|
||||
|
||||
static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
|
||||
|
||||
vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
|
||||
}
|
||||
|
||||
void set_default_svm_state(struct kvm_nested_state *state, int size)
|
||||
{
|
||||
memset(state, 0, size);
|
||||
state->format = 1;
|
||||
state->size = size;
|
||||
state->hdr.svm.vmcb_pa = 0x3000;
|
||||
}
|
||||
|
||||
void test_svm_nested_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Add a page for VMCB. */
|
||||
const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
|
||||
struct kvm_nested_state *state =
|
||||
(struct kvm_nested_state *)malloc(state_sz);
|
||||
|
||||
vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
|
||||
|
||||
/* The format must be set to 1. 0 for VMX, 1 for SVM. */
|
||||
set_default_svm_state(state, state_sz);
|
||||
state->format = 0;
|
||||
test_nested_state_expect_einval(vcpu, state);
|
||||
|
||||
/* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */
|
||||
set_default_svm_state(state, state_sz);
|
||||
state->flags = KVM_STATE_NESTED_EVMCS;
|
||||
test_nested_state_expect_einval(vcpu, state);
|
||||
|
||||
/*
|
||||
* If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
|
||||
* cleared.
|
||||
*/
|
||||
vcpu_efer_disable_svm(vcpu);
|
||||
|
||||
set_default_svm_state(state, state_sz);
|
||||
state->flags = KVM_STATE_NESTED_GUEST_MODE;
|
||||
test_nested_state_expect_einval(vcpu, state);
|
||||
|
||||
state->flags = 0;
|
||||
test_nested_state(vcpu, state);
|
||||
|
||||
state->flags = KVM_STATE_NESTED_GIF_SET;
|
||||
test_nested_state(vcpu, state);
|
||||
|
||||
/* Enable SVM in the guest EFER. */
|
||||
vcpu_efer_enable_svm(vcpu);
|
||||
|
||||
/* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
|
||||
set_default_svm_state(state, state_sz);
|
||||
state->hdr.svm.vmcb_pa = -1ull;
|
||||
state->flags = 0;
|
||||
test_nested_state(vcpu, state);
|
||||
state->flags = KVM_STATE_NESTED_GUEST_MODE;
|
||||
test_nested_state_expect_einval(vcpu, state);
|
||||
|
||||
/*
|
||||
* Size must be large enough to fit kvm_nested_state and VMCB
|
||||
* only when entering guest mode.
|
||||
*/
|
||||
set_default_svm_state(state, state_sz/2);
|
||||
state->flags = 0;
|
||||
test_nested_state(vcpu, state);
|
||||
state->flags = KVM_STATE_NESTED_GUEST_MODE;
|
||||
test_nested_state_expect_einval(vcpu, state);
|
||||
|
||||
/*
|
||||
* Test that if we leave nesting the state reflects that when we get it
|
||||
* again, except for vmcb_pa, which is always returned as 0 when not in
|
||||
* guest mode.
|
||||
*/
|
||||
set_default_svm_state(state, state_sz);
|
||||
state->hdr.svm.vmcb_pa = -1ull;
|
||||
state->flags = KVM_STATE_NESTED_GIF_SET;
|
||||
test_nested_state(vcpu, state);
|
||||
vcpu_nested_state_get(vcpu, state);
|
||||
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
|
||||
"Size must be between %ld and %d. The size returned was %d.",
|
||||
sizeof(*state), state_sz, state->size);
|
||||
|
||||
TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
|
||||
TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
|
||||
|
||||
free(state);
|
||||
}
|
||||
|
|
@ -255,20 +354,20 @@ int main(int argc, char *argv[])
|
|||
|
||||
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
|
||||
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
|
||||
kvm_cpu_has(X86_FEATURE_SVM));
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
|
||||
|
||||
/*
|
||||
* AMD currently does not implement set_nested_state, so for now we
|
||||
* just early out.
|
||||
*/
|
||||
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
|
||||
|
||||
vm = vm_create_with_one_vcpu(&vcpu, NULL);
|
||||
|
||||
/*
|
||||
* First run tests with VMX disabled to check error handling.
|
||||
* First run tests with VMX/SVM disabled to check error handling.
|
||||
* test_{vmx/svm}_nested_state() will re-enable as needed.
|
||||
*/
|
||||
vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
|
||||
if (kvm_cpu_has(X86_FEATURE_VMX))
|
||||
vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
|
||||
else
|
||||
vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
|
||||
|
||||
/* Passing a NULL kvm_nested_state causes a EFAULT. */
|
||||
test_nested_state_expect_efault(vcpu, NULL);
|
||||
|
|
@ -297,7 +396,10 @@ int main(int argc, char *argv[])
|
|||
state.flags = KVM_STATE_NESTED_RUN_PENDING;
|
||||
test_nested_state_expect_einval(vcpu, &state);
|
||||
|
||||
test_vmx_nested_state(vcpu);
|
||||
if (kvm_cpu_has(X86_FEATURE_VMX))
|
||||
test_vmx_nested_state(vcpu);
|
||||
else
|
||||
test_svm_nested_state(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
|
|
@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
|
|||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
|
||||
"Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
"Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
|
|||
|
||||
run_guest(vmcb, svm->vmcb_gpa);
|
||||
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
|
||||
"Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
"Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
|
||||
vmcb->control.exit_code,
|
||||
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue