mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
- gmap rewrite: completely new memory management for kvm/s390
- vSIE improvement - maintainership change for s390 vfio-pci - small quality of life improvement for protected guests -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEoWuZBM6M3lCBSfTnuARItAMU6BMFAmmLOKAACgkQuARItAMU 6BOkhQ/9G0Sr1bfkcSQvbszvjSoDsOCSm+oAm39679Dr4v+7SsItknTutYK0M7dM n6oY2kU1dveFsF0FwBAALh4LC0lYNEmz34eHxaFPFfgqQ9lX2fBAQuBSPq8uOz1L Pk6IIlqTls8TAvKF/KqTXCEULnPGVXD2KP4WaSir+T2vZr6N/mqB7cZPR23/rMAi +PF/UtmgbfG+eRjqA8QRdm8nnzTrM5cWe9roZXYsAXDLZh+EYYeG4d96GFTV8udY /6mV1YKP0Aa+youC5p4oIh1Iv7p/Yjv6RxPoEbW1O31M9yBDJmFpz4W5C/rdkzwI nOjQj9i7ZINXf83kAZMvFb8MdXlJzaw8rBUlWzxNfrsL4ga8Rp3xMNEdufWd3T5x zNFXr0ANuBifi0B0EasBWlYDRbK4WGAC4vnkgmxqP5t2JiAN+d0FXB8LaRyZvgs/ tiwEDenCk1eDWEBcWbLnX7fGGDKDUNXVMAFTrGM1BMNZe6/IL/h/sypLuYSJ/d3Y VXDgZZyAWUVqjidDxrwurdjyzvbPd69GDbKjhTuUu4OdqUMucjjQf74w6m857Wn/ 9oLoR0p+8deb1SQ2RuB8sujcJiO9YHczwL8PLDa+bGw3jH6TRiMVVrt1HOw5QmfG QpwhKvrF2yPTgv5VZbFvYEvtITnBBfaepQe97pDDEfsHqPeStmI= =2eiy -----END PGP SIGNATURE----- Merge tag 'kvm-s390-next-7.0-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD - gmap rewrite: completely new memory management for kvm/s390 - vSIE improvement - maintainership change for s390 vfio-pci - small quality of life improvement for protected guests
This commit is contained in:
commit
b1195183ed
51 changed files with 5930 additions and 5512 deletions
|
|
@ -6518,6 +6518,40 @@ the capability to be present.
|
|||
|
||||
`flags` must currently be zero.
|
||||
|
||||
4.144 KVM_S390_KEYOP
|
||||
--------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_KEYOP
|
||||
:Architectures: s390
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_s390_keyop (in/out)
|
||||
:Returns: 0 in case of success, < 0 on error
|
||||
|
||||
The specified key operation is performed on the given guest address. The
|
||||
previous storage key (or the relevant part thereof) will be returned in
|
||||
`key`.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_keyop {
|
||||
__u64 guest_addr;
|
||||
__u8 key;
|
||||
__u8 operation;
|
||||
};
|
||||
|
||||
Currently supported values for ``operation``:
|
||||
|
||||
KVM_S390_KEYOP_ISKE
|
||||
Returns the storage key for the guest address ``guest_addr`` in ``key``.
|
||||
|
||||
KVM_S390_KEYOP_RRBE
|
||||
Resets the reference bit for the guest address ``guest_addr``, returning the
|
||||
R and C bits of the old storage key in ``key``; the remaining fields of
|
||||
the storage key will be set to 0.
|
||||
|
||||
KVM_S390_KEYOP_SSKE
|
||||
Sets the storage key for the guest address ``guest_addr`` to the key
|
||||
specified in ``key``, returning the previous value in ``key``.
|
||||
|
||||
.. _kvm_run:
|
||||
|
||||
|
|
@ -9384,6 +9418,14 @@ The presence of this capability indicates that KVM_RUN will update the
|
|||
KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
|
||||
vCPU was executing nested guest code when it exited.
|
||||
|
||||
8.46 KVM_CAP_S390_KEYOP
|
||||
-----------------------
|
||||
|
||||
:Architectures: s390
|
||||
|
||||
The presence of this capability indicates that the KVM_S390_KEYOP ioctl is
|
||||
available.
|
||||
|
||||
KVM exits with the register state of either the L1 or L2 guest
|
||||
depending on which executed at the time of an exit. Userspace must
|
||||
take care to differentiate between these cases.
|
||||
|
|
|
|||
|
|
@ -13914,14 +13914,12 @@ L: kvm@vger.kernel.org
|
|||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
|
||||
F: Documentation/virt/kvm/s390*
|
||||
F: arch/s390/include/asm/gmap.h
|
||||
F: arch/s390/include/asm/gmap_helpers.h
|
||||
F: arch/s390/include/asm/kvm*
|
||||
F: arch/s390/include/uapi/asm/kvm*
|
||||
F: arch/s390/include/uapi/asm/uvdevice.h
|
||||
F: arch/s390/kernel/uv.c
|
||||
F: arch/s390/kvm/
|
||||
F: arch/s390/mm/gmap.c
|
||||
F: arch/s390/mm/gmap_helpers.c
|
||||
F: drivers/s390/char/uvdevice.c
|
||||
F: tools/testing/selftests/drivers/s390x/uvdevice/
|
||||
|
|
@ -23111,7 +23109,8 @@ F: include/uapi/linux/vfio_ccw.h
|
|||
|
||||
S390 VFIO-PCI DRIVER
|
||||
M: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
M: Eric Farman <farman@linux.ibm.com>
|
||||
M: Farhan Ali <alifm@linux.ibm.com>
|
||||
R: Eric Farman <farman@linux.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
|
|
|
|||
|
|
@ -32,9 +32,6 @@ config GENERIC_BUG_RELATIVE_POINTERS
|
|||
config GENERIC_LOCKBREAK
|
||||
def_bool y if PREEMPTION
|
||||
|
||||
config PGSTE
|
||||
def_bool y if KVM
|
||||
|
||||
config AUDIT_ARCH
|
||||
def_bool y
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,32 @@
|
|||
#ifndef _S390_DAT_BITS_H
|
||||
#define _S390_DAT_BITS_H
|
||||
|
||||
/*
|
||||
* vaddress union in order to easily decode a virtual address into its
|
||||
* region first index, region second index etc. parts.
|
||||
*/
|
||||
union vaddress {
|
||||
unsigned long addr;
|
||||
struct {
|
||||
unsigned long rfx : 11;
|
||||
unsigned long rsx : 11;
|
||||
unsigned long rtx : 11;
|
||||
unsigned long sx : 11;
|
||||
unsigned long px : 8;
|
||||
unsigned long bx : 12;
|
||||
};
|
||||
struct {
|
||||
unsigned long rfx01 : 2;
|
||||
unsigned long : 9;
|
||||
unsigned long rsx01 : 2;
|
||||
unsigned long : 9;
|
||||
unsigned long rtx01 : 2;
|
||||
unsigned long : 9;
|
||||
unsigned long sx01 : 2;
|
||||
unsigned long : 29;
|
||||
};
|
||||
};
|
||||
|
||||
union asce {
|
||||
unsigned long val;
|
||||
struct {
|
||||
|
|
@ -98,7 +124,8 @@ union region3_table_entry {
|
|||
struct {
|
||||
unsigned long : 53;
|
||||
unsigned long fc: 1; /* Format-Control */
|
||||
unsigned long : 4;
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long : 3;
|
||||
unsigned long i : 1; /* Region-Invalid Bit */
|
||||
unsigned long cr: 1; /* Common-Region Bit */
|
||||
unsigned long tt: 2; /* Table-Type Bits */
|
||||
|
|
@ -140,7 +167,8 @@ union segment_table_entry {
|
|||
struct {
|
||||
unsigned long : 53;
|
||||
unsigned long fc: 1; /* Format-Control */
|
||||
unsigned long : 4;
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long : 3;
|
||||
unsigned long i : 1; /* Segment-Invalid Bit */
|
||||
unsigned long cs: 1; /* Common-Segment Bit */
|
||||
unsigned long tt: 2; /* Table-Type Bits */
|
||||
|
|
|
|||
|
|
@ -1,174 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM guest address space mapping code
|
||||
*
|
||||
* Copyright IBM Corp. 2007, 2016
|
||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef _ASM_S390_GMAP_H
|
||||
#define _ASM_S390_GMAP_H
|
||||
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/refcount.h>
|
||||
|
||||
/* Generic bits for GMAP notification on DAT table entry changes. */
|
||||
#define GMAP_NOTIFY_SHADOW 0x2
|
||||
#define GMAP_NOTIFY_MPROT 0x1
|
||||
|
||||
/* Status bits only for huge segment entries */
|
||||
#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */
|
||||
#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */
|
||||
|
||||
/**
|
||||
* struct gmap_struct - guest address space
|
||||
* @list: list head for the mm->context gmap list
|
||||
* @mm: pointer to the parent mm_struct
|
||||
* @guest_to_host: radix tree with guest to host address translation
|
||||
* @host_to_guest: radix tree with pointer to segment table entries
|
||||
* @guest_table_lock: spinlock to protect all entries in the guest page table
|
||||
* @ref_count: reference counter for the gmap structure
|
||||
* @table: pointer to the page directory
|
||||
* @asce: address space control element for gmap page table
|
||||
* @pfault_enabled: defines if pfaults are applicable for the guest
|
||||
* @guest_handle: protected virtual machine handle for the ultravisor
|
||||
* @host_to_rmap: radix tree with gmap_rmap lists
|
||||
* @children: list of shadow gmap structures
|
||||
* @shadow_lock: spinlock to protect the shadow gmap list
|
||||
* @parent: pointer to the parent gmap for shadow guest address spaces
|
||||
* @orig_asce: ASCE for which the shadow page table has been created
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
* @removed: flag to indicate if a shadow guest address space has been removed
|
||||
* @initialized: flag to indicate if a shadow guest address space can be used
|
||||
*/
|
||||
struct gmap {
|
||||
struct list_head list;
|
||||
struct mm_struct *mm;
|
||||
struct radix_tree_root guest_to_host;
|
||||
struct radix_tree_root host_to_guest;
|
||||
spinlock_t guest_table_lock;
|
||||
refcount_t ref_count;
|
||||
unsigned long *table;
|
||||
unsigned long asce;
|
||||
unsigned long asce_end;
|
||||
void *private;
|
||||
bool pfault_enabled;
|
||||
/* only set for protected virtual machines */
|
||||
unsigned long guest_handle;
|
||||
/* Additional data for shadow guest address spaces */
|
||||
struct radix_tree_root host_to_rmap;
|
||||
struct list_head children;
|
||||
spinlock_t shadow_lock;
|
||||
struct gmap *parent;
|
||||
unsigned long orig_asce;
|
||||
int edat_level;
|
||||
bool removed;
|
||||
bool initialized;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct gmap_rmap - reverse mapping for shadow page table entries
|
||||
* @next: pointer to next rmap in the list
|
||||
* @raddr: virtual rmap address in the shadow guest address space
|
||||
*/
|
||||
struct gmap_rmap {
|
||||
struct gmap_rmap *next;
|
||||
unsigned long raddr;
|
||||
};
|
||||
|
||||
#define gmap_for_each_rmap(pos, head) \
|
||||
for (pos = (head); pos; pos = pos->next)
|
||||
|
||||
#define gmap_for_each_rmap_safe(pos, n, head) \
|
||||
for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
|
||||
|
||||
/**
|
||||
* struct gmap_notifier - notify function block for page invalidation
|
||||
* @notifier_call: address of callback function
|
||||
*/
|
||||
struct gmap_notifier {
|
||||
struct list_head list;
|
||||
struct rcu_head rcu;
|
||||
void (*notifier_call)(struct gmap *gmap, unsigned long start,
|
||||
unsigned long end);
|
||||
};
|
||||
|
||||
static inline int gmap_is_shadow(struct gmap *gmap)
|
||||
{
|
||||
return !!gmap->parent;
|
||||
}
|
||||
|
||||
struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
|
||||
void gmap_remove(struct gmap *gmap);
|
||||
struct gmap *gmap_get(struct gmap *gmap);
|
||||
void gmap_put(struct gmap *gmap);
|
||||
void gmap_free(struct gmap *gmap);
|
||||
struct gmap *gmap_alloc(unsigned long limit);
|
||||
|
||||
int gmap_map_segment(struct gmap *gmap, unsigned long from,
|
||||
unsigned long to, unsigned long len);
|
||||
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
|
||||
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
|
||||
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
|
||||
void __gmap_zap(struct gmap *, unsigned long gaddr);
|
||||
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
|
||||
|
||||
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
|
||||
|
||||
void gmap_unshadow(struct gmap *sg);
|
||||
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
|
||||
int fake);
|
||||
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
|
||||
int fake);
|
||||
int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
|
||||
int fake);
|
||||
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
|
||||
int fake);
|
||||
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
|
||||
|
||||
void gmap_register_pte_notifier(struct gmap_notifier *);
|
||||
void gmap_unregister_pte_notifier(struct gmap_notifier *);
|
||||
|
||||
int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
|
||||
|
||||
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||
unsigned long gaddr, unsigned long vmaddr);
|
||||
int s390_replace_asce(struct gmap *gmap);
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible);
|
||||
unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls, but
|
||||
* it will otherwise only return when it completed.
|
||||
*/
|
||||
static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
(void)__s390_uv_destroy_range(mm, start, end, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range_interruptible - Destroy a range of pages in the
|
||||
* given mm, but stop when a fatal signal is received.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls. If
|
||||
* a fatal signal is received, it will return with -EINTR immediately,
|
||||
* without finishing destroying the whole range. Upon successful
|
||||
* completion, 0 is returned.
|
||||
*/
|
||||
static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
return __s390_uv_destroy_range(mm, start, end, true);
|
||||
}
|
||||
#endif /* _ASM_S390_GMAP_H */
|
||||
|
|
@ -11,5 +11,6 @@
|
|||
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
|
||||
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
|
||||
int gmap_helper_disable_cow_sharing(void);
|
||||
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr);
|
||||
|
||||
#endif /* _ASM_S390_GMAP_HELPERS_H */
|
||||
|
|
|
|||
|
|
@ -37,12 +37,6 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
|||
return __huge_ptep_get_and_clear(mm, addr, ptep);
|
||||
}
|
||||
|
||||
static inline void arch_clear_hugetlb_flags(struct folio *folio)
|
||||
{
|
||||
clear_bit(PG_arch_1, &folio->flags.f);
|
||||
}
|
||||
#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
|
||||
|
||||
#define __HAVE_ARCH_HUGE_PTE_CLEAR
|
||||
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long sz)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include <asm/isc.h>
|
||||
#include <asm/guarded_storage.h>
|
||||
|
||||
#define KVM_HAVE_MMU_RWLOCK
|
||||
#define KVM_MAX_VCPUS 255
|
||||
|
||||
#define KVM_INTERNAL_MEM_SLOTS 1
|
||||
|
|
@ -441,6 +442,7 @@ struct kvm_vcpu_arch {
|
|||
bool acrs_loaded;
|
||||
struct kvm_s390_pv_vcpu pv;
|
||||
union diag318_info diag318_info;
|
||||
struct kvm_s390_mmu_cache *mc;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
|
|
@ -630,8 +632,12 @@ struct kvm_s390_pv {
|
|||
void *set_aside;
|
||||
struct list_head need_cleanup;
|
||||
struct mmu_notifier mmu_notifier;
|
||||
/* Protects against concurrent import-like operations */
|
||||
struct mutex import_lock;
|
||||
};
|
||||
|
||||
struct kvm_s390_mmu_cache;
|
||||
|
||||
struct kvm_arch {
|
||||
struct esca_block *sca;
|
||||
debug_info_t *dbf;
|
||||
|
|
@ -671,6 +677,7 @@ struct kvm_arch {
|
|||
struct kvm_s390_pv pv;
|
||||
struct list_head kzdev_list;
|
||||
spinlock_t kzdev_list_lock;
|
||||
struct kvm_s390_mmu_cache *mc;
|
||||
};
|
||||
|
||||
#define KVM_HVA_ERR_BAD (-1UL)
|
||||
|
|
|
|||
|
|
@ -18,24 +18,11 @@ typedef struct {
|
|||
unsigned long vdso_base;
|
||||
/* The mmu context belongs to a secure guest. */
|
||||
atomic_t protected_count;
|
||||
/*
|
||||
* The following bitfields need a down_write on the mm
|
||||
* semaphore when they are written to. As they are only
|
||||
* written once, they can be read without a lock.
|
||||
*/
|
||||
/* The mmu context uses extended page tables. */
|
||||
unsigned int has_pgste:1;
|
||||
/* The mmu context uses storage keys. */
|
||||
unsigned int uses_skeys:1;
|
||||
/* The mmu context uses CMM. */
|
||||
unsigned int uses_cmm:1;
|
||||
/*
|
||||
* The mmu context allows COW-sharing of memory pages (KSM, zeropage).
|
||||
* Note that COW-sharing during fork() is currently always allowed.
|
||||
*/
|
||||
unsigned int allow_cow_sharing:1;
|
||||
/* The gmaps associated with this context are allowed to use huge pages. */
|
||||
unsigned int allow_gmap_hpage_1m:1;
|
||||
} mm_context_t;
|
||||
|
||||
#define INIT_MM_CONTEXT(name) \
|
||||
|
|
|
|||
|
|
@ -29,12 +29,8 @@ static inline int init_new_context(struct task_struct *tsk,
|
|||
atomic_set(&mm->context.protected_count, 0);
|
||||
mm->context.gmap_asce = 0;
|
||||
mm->context.flush_mm = 0;
|
||||
#ifdef CONFIG_PGSTE
|
||||
mm->context.has_pgste = 0;
|
||||
mm->context.uses_skeys = 0;
|
||||
mm->context.uses_cmm = 0;
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
mm->context.allow_cow_sharing = 1;
|
||||
mm->context.allow_gmap_hpage_1m = 0;
|
||||
#endif
|
||||
switch (mm->context.asce_limit) {
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -78,7 +78,6 @@ static inline void copy_page(void *to, void *from)
|
|||
#ifdef STRICT_MM_TYPECHECKS
|
||||
|
||||
typedef struct { unsigned long pgprot; } pgprot_t;
|
||||
typedef struct { unsigned long pgste; } pgste_t;
|
||||
typedef struct { unsigned long pte; } pte_t;
|
||||
typedef struct { unsigned long pmd; } pmd_t;
|
||||
typedef struct { unsigned long pud; } pud_t;
|
||||
|
|
@ -94,7 +93,6 @@ static __always_inline unsigned long name ## _val(name ## _t name) \
|
|||
#else /* STRICT_MM_TYPECHECKS */
|
||||
|
||||
typedef unsigned long pgprot_t;
|
||||
typedef unsigned long pgste_t;
|
||||
typedef unsigned long pte_t;
|
||||
typedef unsigned long pmd_t;
|
||||
typedef unsigned long pud_t;
|
||||
|
|
@ -110,7 +108,6 @@ static __always_inline unsigned long name ## _val(name ## _t name) \
|
|||
#endif /* STRICT_MM_TYPECHECKS */
|
||||
|
||||
DEFINE_PGVAL_FUNC(pgprot)
|
||||
DEFINE_PGVAL_FUNC(pgste)
|
||||
DEFINE_PGVAL_FUNC(pte)
|
||||
DEFINE_PGVAL_FUNC(pmd)
|
||||
DEFINE_PGVAL_FUNC(pud)
|
||||
|
|
@ -120,7 +117,6 @@ DEFINE_PGVAL_FUNC(pgd)
|
|||
typedef pte_t *pgtable_t;
|
||||
|
||||
#define __pgprot(x) ((pgprot_t) { (x) } )
|
||||
#define __pgste(x) ((pgste_t) { (x) } )
|
||||
#define __pte(x) ((pte_t) { (x) } )
|
||||
#define __pmd(x) ((pmd_t) { (x) } )
|
||||
#define __pud(x) ((pud_t) { (x) } )
|
||||
|
|
|
|||
|
|
@ -27,10 +27,6 @@ unsigned long *page_table_alloc_noprof(struct mm_struct *);
|
|||
#define page_table_alloc(...) alloc_hooks(page_table_alloc_noprof(__VA_ARGS__))
|
||||
void page_table_free(struct mm_struct *, unsigned long *);
|
||||
|
||||
struct ptdesc *page_table_alloc_pgste_noprof(struct mm_struct *mm);
|
||||
#define page_table_alloc_pgste(...) alloc_hooks(page_table_alloc_pgste_noprof(__VA_ARGS__))
|
||||
void page_table_free_pgste(struct ptdesc *ptdesc);
|
||||
|
||||
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
|
||||
{
|
||||
memset64((u64 *)crst, entry, _CRST_ENTRIES);
|
||||
|
|
|
|||
|
|
@ -413,28 +413,6 @@ void setup_protection_map(void);
|
|||
* SW-bits: y young, d dirty, r read, w write
|
||||
*/
|
||||
|
||||
/* Page status table bits for virtualization */
|
||||
#define PGSTE_ACC_BITS 0xf000000000000000UL
|
||||
#define PGSTE_FP_BIT 0x0800000000000000UL
|
||||
#define PGSTE_PCL_BIT 0x0080000000000000UL
|
||||
#define PGSTE_HR_BIT 0x0040000000000000UL
|
||||
#define PGSTE_HC_BIT 0x0020000000000000UL
|
||||
#define PGSTE_GR_BIT 0x0004000000000000UL
|
||||
#define PGSTE_GC_BIT 0x0002000000000000UL
|
||||
#define PGSTE_ST2_MASK 0x0000ffff00000000UL
|
||||
#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
|
||||
#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
|
||||
#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
|
||||
|
||||
/* Guest Page State used for virtualization */
|
||||
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
|
||||
#define _PGSTE_GPS_NODAT 0x0000000040000000UL
|
||||
#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
|
||||
#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
|
||||
#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
|
||||
#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL
|
||||
#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK
|
||||
|
||||
/*
|
||||
* A user page table pointer has the space-switch-event bit, the
|
||||
* private-space-control bit and the storage-alteration-event-control
|
||||
|
|
@ -566,34 +544,15 @@ static inline bool mm_pmd_folded(struct mm_struct *mm)
|
|||
}
|
||||
#define mm_pmd_folded(mm) mm_pmd_folded(mm)
|
||||
|
||||
static inline int mm_has_pgste(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (unlikely(mm->context.has_pgste))
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int mm_is_protected(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
if (unlikely(atomic_read(&mm->context.protected_count)))
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)
|
||||
{
|
||||
return __pgste(pgste_val(pgste) & ~mask);
|
||||
}
|
||||
|
||||
static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)
|
||||
{
|
||||
return __pgste(pgste_val(pgste) | mask);
|
||||
}
|
||||
|
||||
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
|
||||
{
|
||||
return __pte(pte_val(pte) & ~pgprot_val(prot));
|
||||
|
|
@ -632,22 +591,13 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
|
|||
#define mm_forbids_zeropage mm_forbids_zeropage
|
||||
static inline int mm_forbids_zeropage(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
if (!mm->context.allow_cow_sharing)
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int mm_uses_skeys(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (mm->context.uses_skeys)
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cspg() - Compare and Swap and Purge (CSPG)
|
||||
* @ptr: Pointer to the value to be exchanged
|
||||
|
|
@ -1136,6 +1086,13 @@ static inline pte_t pte_mkhuge(pte_t pte)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
|
||||
{
|
||||
asm volatile("sske %[skey],%[addr],1"
|
||||
: [addr] "+a" (addr) : [skey] "d" (skey));
|
||||
return addr;
|
||||
}
|
||||
|
||||
#define IPTE_GLOBAL 0
|
||||
#define IPTE_LOCAL 1
|
||||
|
||||
|
|
@ -1232,7 +1189,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
|||
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(mm) && pte_present(res))
|
||||
uv_convert_from_secure_pte(res);
|
||||
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -1250,7 +1207,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
|
|||
res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(vma->vm_mm) && pte_present(res))
|
||||
uv_convert_from_secure_pte(res);
|
||||
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -1287,9 +1244,10 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
|||
/*
|
||||
* If something went wrong and the page could not be destroyed, or
|
||||
* if this is not a mm teardown, the slower export is used as
|
||||
* fallback instead.
|
||||
* fallback instead. If even that fails, print a warning and leak
|
||||
* the page, to avoid crashing the whole system.
|
||||
*/
|
||||
uv_convert_from_secure_pte(res);
|
||||
WARN_ON_ONCE(uv_convert_from_secure_pte(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -1348,50 +1306,13 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
|
|||
{
|
||||
if (pte_same(*ptep, entry))
|
||||
return 0;
|
||||
if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
|
||||
if (cpu_has_rdp() && pte_allow_rdp(*ptep, entry))
|
||||
ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
|
||||
else
|
||||
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Additional functions to handle KVM guest page tables
|
||||
*/
|
||||
void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t entry);
|
||||
void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
void ptep_notify(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, unsigned long bits);
|
||||
int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
|
||||
pte_t *ptep, int prot, unsigned long bit);
|
||||
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep , int reset);
|
||||
void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
|
||||
int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
|
||||
pte_t *sptep, pte_t *tptep, pte_t pte);
|
||||
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
|
||||
|
||||
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
|
||||
pte_t *ptep);
|
||||
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char key, bool nq);
|
||||
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char key, unsigned char *oldkey,
|
||||
bool nq, bool mr, bool mc);
|
||||
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
|
||||
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char *key);
|
||||
|
||||
int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long bits, unsigned long value);
|
||||
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
|
||||
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
unsigned long *oldpte, unsigned long *oldpgste);
|
||||
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
|
||||
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
|
||||
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
|
||||
|
||||
#define pgprot_writecombine pgprot_writecombine
|
||||
pgprot_t pgprot_writecombine(pgprot_t prot);
|
||||
|
||||
|
|
@ -1406,23 +1327,12 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
|
|||
{
|
||||
if (pte_present(entry))
|
||||
entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
|
||||
if (mm_has_pgste(mm)) {
|
||||
for (;;) {
|
||||
ptep_set_pte_at(mm, addr, ptep, entry);
|
||||
if (--nr == 0)
|
||||
break;
|
||||
ptep++;
|
||||
entry = __pte(pte_val(entry) + PAGE_SIZE);
|
||||
addr += PAGE_SIZE;
|
||||
}
|
||||
} else {
|
||||
for (;;) {
|
||||
set_pte(ptep, entry);
|
||||
if (--nr == 0)
|
||||
break;
|
||||
ptep++;
|
||||
entry = __pte(pte_val(entry) + PAGE_SIZE);
|
||||
}
|
||||
for (;;) {
|
||||
set_pte(ptep, entry);
|
||||
if (--nr == 0)
|
||||
break;
|
||||
ptep++;
|
||||
entry = __pte(pte_val(entry) + PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
#define set_ptes set_ptes
|
||||
|
|
@ -2015,9 +1925,6 @@ extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t p
|
|||
extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
|
||||
extern void vmem_unmap_4k_page(unsigned long addr);
|
||||
extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
|
||||
extern int s390_enable_sie(void);
|
||||
extern int s390_enable_skey(void);
|
||||
extern void s390_reset_cmma(struct mm_struct *mm);
|
||||
|
||||
/* s390 has a private copy of get unmapped area to deal with cache synonyms */
|
||||
#define HAVE_ARCH_UNMAPPED_AREA
|
||||
|
|
@ -2026,40 +1933,4 @@ extern void s390_reset_cmma(struct mm_struct *mm);
|
|||
#define pmd_pgtable(pmd) \
|
||||
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
|
||||
|
||||
static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
|
||||
{
|
||||
unsigned long *pgstes, res;
|
||||
|
||||
pgstes = pgt + _PAGE_ENTRIES;
|
||||
|
||||
res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
|
||||
res |= pgstes[1] & PGSTE_ST2_MASK;
|
||||
res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
|
||||
res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_get_lock(pte_t *ptep)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
#ifdef CONFIG_PGSTE
|
||||
unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
|
||||
|
||||
do {
|
||||
value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
|
||||
} while (value & PGSTE_PCL_BIT);
|
||||
value |= PGSTE_PCL_BIT;
|
||||
#endif
|
||||
return __pgste(value);
|
||||
}
|
||||
|
||||
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
barrier();
|
||||
WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _S390_PAGE_H */
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
|
|||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm-generic/tlb.h>
|
||||
#include <asm/gmap.h>
|
||||
|
||||
/*
|
||||
* Release the page cache reference for a pte removed by
|
||||
|
|
@ -85,8 +84,6 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
|
|||
tlb->mm->context.flush_mm = 1;
|
||||
tlb->freed_tables = 1;
|
||||
tlb->cleared_pmds = 1;
|
||||
if (mm_has_pgste(tlb->mm))
|
||||
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
|
||||
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -471,65 +471,15 @@ do { \
|
|||
#define arch_get_kernel_nofault __mvc_kernel_nofault
|
||||
#define arch_put_kernel_nofault __mvc_kernel_nofault
|
||||
|
||||
void __cmpxchg_user_key_called_with_bad_pointer(void);
|
||||
|
||||
int __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
|
||||
unsigned char old, unsigned char new, unsigned long key);
|
||||
int __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
|
||||
unsigned short old, unsigned short new, unsigned long key);
|
||||
int __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
|
||||
unsigned int old, unsigned int new, unsigned long key);
|
||||
int __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
|
||||
unsigned long old, unsigned long new, unsigned long key);
|
||||
int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
|
||||
__uint128_t old, __uint128_t new, unsigned long key);
|
||||
|
||||
static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval,
|
||||
__uint128_t old, __uint128_t new,
|
||||
unsigned long key, int size)
|
||||
{
|
||||
switch (size) {
|
||||
case 1: return __cmpxchg_user_key1(address, uval, old, new, key);
|
||||
case 2: return __cmpxchg_user_key2(address, uval, old, new, key);
|
||||
case 4: return __cmpxchg_user_key4(address, uval, old, new, key);
|
||||
case 8: return __cmpxchg_user_key8(address, uval, old, new, key);
|
||||
case 16: return __cmpxchg_user_key16(address, uval, old, new, key);
|
||||
default: __cmpxchg_user_key_called_with_bad_pointer();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
|
||||
* @ptr: User space address of value to compare to @old and exchange with
|
||||
* @new. Must be aligned to sizeof(*@ptr).
|
||||
* @uval: Address where the old value of *@ptr is written to.
|
||||
* @old: Old value. Compared to the content pointed to by @ptr in order to
|
||||
* determine if the exchange occurs. The old value read from *@ptr is
|
||||
* written to *@uval.
|
||||
* @new: New value to place at *@ptr.
|
||||
* @key: Access key to use for checking storage key protection.
|
||||
*
|
||||
* Perform a cmpxchg on a user space target, honoring storage key protection.
|
||||
* @key alone determines how key checking is performed, neither
|
||||
* storage-protection-override nor fetch-protection-override apply.
|
||||
* The caller must compare *@uval and @old to determine if values have been
|
||||
* exchanged. In case of an exception *@uval is set to zero.
|
||||
*
|
||||
* Return: 0: cmpxchg executed
|
||||
* -EFAULT: an exception happened when trying to access *@ptr
|
||||
* -EAGAIN: maxed out number of retries (byte and short only)
|
||||
*/
|
||||
#define cmpxchg_user_key(ptr, uval, old, new, key) \
|
||||
({ \
|
||||
__typeof__(ptr) __ptr = (ptr); \
|
||||
__typeof__(uval) __uval = (uval); \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
|
||||
might_fault(); \
|
||||
__chk_user_ptr(__ptr); \
|
||||
_cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
|
||||
(old), (new), (key), sizeof(*(__ptr))); \
|
||||
})
|
||||
int __cmpxchg_key1(void *address, unsigned char *uval, unsigned char old,
|
||||
unsigned char new, unsigned long key);
|
||||
int __cmpxchg_key2(void *address, unsigned short *uval, unsigned short old,
|
||||
unsigned short new, unsigned long key);
|
||||
int __cmpxchg_key4(void *address, unsigned int *uval, unsigned int old,
|
||||
unsigned int new, unsigned long key);
|
||||
int __cmpxchg_key8(void *address, unsigned long *uval, unsigned long old,
|
||||
unsigned long new, unsigned long key);
|
||||
int __cmpxchg_key16(void *address, __uint128_t *uval, __uint128_t old,
|
||||
__uint128_t new, unsigned long key);
|
||||
|
||||
#endif /* __S390_UACCESS_H */
|
||||
|
|
|
|||
|
|
@ -631,7 +631,8 @@ int uv_pin_shared(unsigned long paddr);
|
|||
int uv_destroy_folio(struct folio *folio);
|
||||
int uv_destroy_pte(pte_t pte);
|
||||
int uv_convert_from_secure_pte(pte_t pte);
|
||||
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
|
||||
int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio);
|
||||
int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
|
||||
int uv_convert_from_secure(unsigned long paddr);
|
||||
int uv_convert_from_secure_folio(struct folio *folio);
|
||||
|
||||
|
|
|
|||
|
|
@ -134,14 +134,15 @@ static int uv_destroy(unsigned long paddr)
|
|||
*/
|
||||
int uv_destroy_folio(struct folio *folio)
|
||||
{
|
||||
unsigned long i;
|
||||
int rc;
|
||||
|
||||
/* Large folios cannot be secure */
|
||||
if (unlikely(folio_test_large(folio)))
|
||||
return 0;
|
||||
|
||||
folio_get(folio);
|
||||
rc = uv_destroy(folio_to_phys(folio));
|
||||
for (i = 0; i < (1 << folio_order(folio)); i++) {
|
||||
rc = uv_destroy(folio_to_phys(folio) + i * PAGE_SIZE);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
if (!rc)
|
||||
clear_bit(PG_arch_1, &folio->flags.f);
|
||||
folio_put(folio);
|
||||
|
|
@ -183,14 +184,15 @@ EXPORT_SYMBOL_GPL(uv_convert_from_secure);
|
|||
*/
|
||||
int uv_convert_from_secure_folio(struct folio *folio)
|
||||
{
|
||||
unsigned long i;
|
||||
int rc;
|
||||
|
||||
/* Large folios cannot be secure */
|
||||
if (unlikely(folio_test_large(folio)))
|
||||
return 0;
|
||||
|
||||
folio_get(folio);
|
||||
rc = uv_convert_from_secure(folio_to_phys(folio));
|
||||
for (i = 0; i < (1 << folio_order(folio)); i++) {
|
||||
rc = uv_convert_from_secure(folio_to_phys(folio) + i * PAGE_SIZE);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
if (!rc)
|
||||
clear_bit(PG_arch_1, &folio->flags.f);
|
||||
folio_put(folio);
|
||||
|
|
@ -207,39 +209,6 @@ int uv_convert_from_secure_pte(pte_t pte)
|
|||
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
|
||||
}
|
||||
|
||||
/**
|
||||
* should_export_before_import - Determine whether an export is needed
|
||||
* before an import-like operation
|
||||
* @uvcb: the Ultravisor control block of the UVC to be performed
|
||||
* @mm: the mm of the process
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: true if an export is needed before every import, otherwise false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* The misc feature indicates, among other things, that importing a
|
||||
* shared page from a different protected VM will automatically also
|
||||
* transfer its ownership.
|
||||
*/
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC))
|
||||
return false;
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the expected ref_count for a folio that would otherwise have no
|
||||
* further pins. This was cribbed from similar functions in other places in
|
||||
|
|
@ -279,7 +248,7 @@ static int expected_folio_refs(struct folio *folio)
|
|||
* (it's the same logic as split_folio()), and the folio must be
|
||||
* locked.
|
||||
*/
|
||||
static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
||||
int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
||||
{
|
||||
int expected, cc = 0;
|
||||
|
||||
|
|
@ -309,20 +278,7 @@ static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
|
|||
return -EAGAIN;
|
||||
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
}
|
||||
|
||||
static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!folio_trylock(folio))
|
||||
return -EAGAIN;
|
||||
if (should_export_before_import(uvcb, mm))
|
||||
uv_convert_from_secure(folio_to_phys(folio));
|
||||
rc = __make_folio_secure(folio, uvcb);
|
||||
folio_unlock(folio);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__make_folio_secure);
|
||||
|
||||
/**
|
||||
* s390_wiggle_split_folio() - try to drain extra references to a folio and
|
||||
|
|
@ -337,7 +293,7 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u
|
|||
* but another attempt can be made;
|
||||
* -EINVAL in case of other folio splitting errors. See split_folio().
|
||||
*/
|
||||
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
|
||||
int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
|
||||
{
|
||||
int rc, tried_splits;
|
||||
|
||||
|
|
@ -409,56 +365,7 @@ static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
|
|||
}
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct folio_walk fw;
|
||||
struct folio *folio;
|
||||
int rc;
|
||||
|
||||
mmap_read_lock(mm);
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma) {
|
||||
mmap_read_unlock(mm);
|
||||
return -EFAULT;
|
||||
}
|
||||
folio = folio_walk_start(&fw, vma, hva, 0);
|
||||
if (!folio) {
|
||||
mmap_read_unlock(mm);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
folio_get(folio);
|
||||
/*
|
||||
* Secure pages cannot be huge and userspace should not combine both.
|
||||
* In case userspace does it anyway this will result in an -EFAULT for
|
||||
* the unpack. The guest is thus never reaching secure mode.
|
||||
* If userspace plays dirty tricks and decides to map huge pages at a
|
||||
* later point in time, it will receive a segmentation fault or
|
||||
* KVM_RUN will return -EFAULT.
|
||||
*/
|
||||
if (folio_test_hugetlb(folio))
|
||||
rc = -EFAULT;
|
||||
else if (folio_test_large(folio))
|
||||
rc = -E2BIG;
|
||||
else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
|
||||
rc = -ENXIO;
|
||||
else
|
||||
rc = make_folio_secure(mm, folio, uvcb);
|
||||
folio_walk_end(&fw, vma);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
if (rc == -E2BIG || rc == -EBUSY) {
|
||||
rc = s390_wiggle_split_folio(mm, folio);
|
||||
if (!rc)
|
||||
rc = -EAGAIN;
|
||||
}
|
||||
folio_put(folio);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(make_hva_secure);
|
||||
EXPORT_SYMBOL_GPL(s390_wiggle_split_folio);
|
||||
|
||||
/*
|
||||
* To be called with the folio locked or with an extra reference! This will
|
||||
|
|
@ -470,21 +377,18 @@ int arch_make_folio_accessible(struct folio *folio)
|
|||
{
|
||||
int rc = 0;
|
||||
|
||||
/* Large folios cannot be secure */
|
||||
if (unlikely(folio_test_large(folio)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* PG_arch_1 is used in 2 places:
|
||||
* 1. for storage keys of hugetlb folios and KVM
|
||||
* 2. As an indication that this small folio might be secure. This can
|
||||
* overindicate, e.g. we set the bit before calling
|
||||
* convert_to_secure.
|
||||
* As secure pages are never large folios, both variants can co-exists.
|
||||
* PG_arch_1 is used as an indication that this small folio might be
|
||||
* secure. This can overindicate, e.g. we set the bit before calling
|
||||
* convert_to_secure.
|
||||
*/
|
||||
if (!test_bit(PG_arch_1, &folio->flags.f))
|
||||
return 0;
|
||||
|
||||
/* Large folios cannot be secure. */
|
||||
if (WARN_ON_ONCE(folio_test_large(folio)))
|
||||
return -EFAULT;
|
||||
|
||||
rc = uv_pin_shared(folio_to_phys(folio));
|
||||
if (!rc) {
|
||||
clear_bit(PG_arch_1, &folio->flags.f);
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ config KVM
|
|||
select KVM_VFIO
|
||||
select MMU_NOTIFIER
|
||||
select VIRT_XFER_TO_GUEST_WORK
|
||||
select KVM_GENERIC_MMU_NOTIFIER
|
||||
select KVM_MMU_LOCKLESS_AGING
|
||||
help
|
||||
Support hosting paravirtualized guest machines using the SIE
|
||||
virtualization capability on the mainframe. This should work
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ include $(srctree)/virt/kvm/Makefile.kvm
|
|||
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||
|
||||
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
|
||||
kvm-y += dat.o gmap.o faultin.o
|
||||
|
||||
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
|
|
|||
1391
arch/s390/kvm/dat.c
Normal file
1391
arch/s390/kvm/dat.c
Normal file
File diff suppressed because it is too large
Load diff
970
arch/s390/kvm/dat.h
Normal file
970
arch/s390/kvm/dat.h
Normal file
|
|
@ -0,0 +1,970 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM guest address space mapping code
|
||||
*
|
||||
* Copyright IBM Corp. 2024, 2025
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_S390_DAT_H
|
||||
#define __KVM_S390_DAT_H
|
||||
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/dat-bits.h>
|
||||
|
||||
/*
|
||||
* Base address and length must be sent at the start of each block, therefore
|
||||
* it's cheaper to send some clean data, as long as it's less than the size of
|
||||
* two longs.
|
||||
*/
|
||||
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
|
||||
/* For consistency */
|
||||
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
|
||||
|
||||
#define _ASCE(x) ((union asce) { .val = (x), })
|
||||
#define NULL_ASCE _ASCE(0)
|
||||
|
||||
enum {
|
||||
_DAT_TOKEN_NONE = 0,
|
||||
_DAT_TOKEN_PIC,
|
||||
};
|
||||
|
||||
#define _CRSTE_TOK(l, t, p) ((union crste) { \
|
||||
.tok.i = 1, \
|
||||
.tok.tt = (l), \
|
||||
.tok.type = (t), \
|
||||
.tok.par = (p) \
|
||||
})
|
||||
#define _CRSTE_PIC(l, p) _CRSTE_TOK(l, _DAT_TOKEN_PIC, p)
|
||||
|
||||
#define _CRSTE_HOLE(l) _CRSTE_PIC(l, PGM_ADDRESSING)
|
||||
#define _CRSTE_EMPTY(l) _CRSTE_TOK(l, _DAT_TOKEN_NONE, 0)
|
||||
|
||||
#define _PMD_EMPTY _CRSTE_EMPTY(TABLE_TYPE_SEGMENT)
|
||||
|
||||
#define _PTE_TOK(t, p) ((union pte) { .tok.i = 1, .tok.type = (t), .tok.par = (p) })
|
||||
#define _PTE_EMPTY _PTE_TOK(_DAT_TOKEN_NONE, 0)
|
||||
|
||||
/* This fake table type is used for page table walks (both for normal page tables and vSIE) */
|
||||
#define TABLE_TYPE_PAGE_TABLE -1
|
||||
|
||||
enum dat_walk_flags {
|
||||
DAT_WALK_USES_SKEYS = 0x40,
|
||||
DAT_WALK_CONTINUE = 0x20,
|
||||
DAT_WALK_IGN_HOLES = 0x10,
|
||||
DAT_WALK_SPLIT = 0x08,
|
||||
DAT_WALK_ALLOC = 0x04,
|
||||
DAT_WALK_ANY = 0x02,
|
||||
DAT_WALK_LEAF = 0x01,
|
||||
DAT_WALK_DEFAULT = 0
|
||||
};
|
||||
|
||||
#define DAT_WALK_SPLIT_ALLOC (DAT_WALK_SPLIT | DAT_WALK_ALLOC)
|
||||
#define DAT_WALK_ALLOC_CONTINUE (DAT_WALK_CONTINUE | DAT_WALK_ALLOC)
|
||||
#define DAT_WALK_LEAF_ALLOC (DAT_WALK_LEAF | DAT_WALK_ALLOC)
|
||||
|
||||
union pte {
|
||||
unsigned long val;
|
||||
union page_table_entry h;
|
||||
struct {
|
||||
unsigned long :56; /* Hardware bits */
|
||||
unsigned long u : 1; /* Page unused */
|
||||
unsigned long s : 1; /* Special */
|
||||
unsigned long w : 1; /* Writable */
|
||||
unsigned long r : 1; /* Readable */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long sd: 1; /* Soft dirty */
|
||||
unsigned long pr: 1; /* Present */
|
||||
} s;
|
||||
struct {
|
||||
unsigned char hwbytes[7];
|
||||
unsigned char swbyte;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
unsigned long type :16; /* Token type */
|
||||
unsigned long par :16; /* Token parameter */
|
||||
unsigned long :20;
|
||||
unsigned long : 1; /* Must be 0 */
|
||||
unsigned long i : 1; /* Must be 1 */
|
||||
unsigned long : 2;
|
||||
unsigned long : 7;
|
||||
unsigned long pr : 1; /* Must be 0 */
|
||||
};
|
||||
struct {
|
||||
unsigned long token:32; /* Token and parameter */
|
||||
unsigned long :32;
|
||||
};
|
||||
} tok;
|
||||
};
|
||||
|
||||
/* Soft dirty, needed as macro for atomic operations on ptes */
|
||||
#define _PAGE_SD 0x002
|
||||
|
||||
/* Needed as macro to perform atomic operations */
|
||||
#define PGSTE_PCL_BIT 0x0080000000000000UL /* PCL lock, HW bit */
|
||||
#define PGSTE_CMMA_D_BIT 0x0000000000008000UL /* CMMA dirty soft-bit */
|
||||
|
||||
enum pgste_gps_usage {
|
||||
PGSTE_GPS_USAGE_STABLE = 0,
|
||||
PGSTE_GPS_USAGE_UNUSED,
|
||||
PGSTE_GPS_USAGE_POT_VOLATILE,
|
||||
PGSTE_GPS_USAGE_VOLATILE,
|
||||
};
|
||||
|
||||
union pgste {
|
||||
unsigned long val;
|
||||
struct {
|
||||
unsigned long acc : 4;
|
||||
unsigned long fp : 1;
|
||||
unsigned long : 3;
|
||||
unsigned long pcl : 1;
|
||||
unsigned long hr : 1;
|
||||
unsigned long hc : 1;
|
||||
unsigned long : 2;
|
||||
unsigned long gr : 1;
|
||||
unsigned long gc : 1;
|
||||
unsigned long : 1;
|
||||
unsigned long :16; /* val16 */
|
||||
unsigned long zero : 1;
|
||||
unsigned long nodat : 1;
|
||||
unsigned long : 4;
|
||||
unsigned long usage : 2;
|
||||
unsigned long : 8;
|
||||
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 5;
|
||||
unsigned long : 8;
|
||||
};
|
||||
struct {
|
||||
unsigned short hwbytes0;
|
||||
unsigned short val16; /* Used to store chunked values, see dat_{s,g}et_ptval() */
|
||||
unsigned short hwbytes4;
|
||||
unsigned char flags; /* Maps to the software bits */
|
||||
unsigned char hwbyte7;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
union pmd {
|
||||
unsigned long val;
|
||||
union segment_table_entry h;
|
||||
struct {
|
||||
struct {
|
||||
unsigned long :44; /* HW */
|
||||
unsigned long : 3; /* Unused */
|
||||
unsigned long : 1; /* HW */
|
||||
unsigned long w : 1; /* Writable soft-bit */
|
||||
unsigned long r : 1; /* Readable soft-bit */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1; /* Unused */
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
} fc1;
|
||||
} s;
|
||||
};
|
||||
|
||||
union pud {
|
||||
unsigned long val;
|
||||
union region3_table_entry h;
|
||||
struct {
|
||||
struct {
|
||||
unsigned long :33; /* HW */
|
||||
unsigned long :14; /* Unused */
|
||||
unsigned long : 1; /* HW */
|
||||
unsigned long w : 1; /* Writable soft-bit */
|
||||
unsigned long r : 1; /* Readable soft-bit */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1; /* Unused */
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
} fc1;
|
||||
} s;
|
||||
};
|
||||
|
||||
union p4d {
|
||||
unsigned long val;
|
||||
union region2_table_entry h;
|
||||
};
|
||||
|
||||
union pgd {
|
||||
unsigned long val;
|
||||
union region1_table_entry h;
|
||||
};
|
||||
|
||||
union crste {
|
||||
unsigned long val;
|
||||
union {
|
||||
struct {
|
||||
unsigned long :52;
|
||||
unsigned long : 1;
|
||||
unsigned long fc: 1;
|
||||
unsigned long p : 1;
|
||||
unsigned long : 1;
|
||||
unsigned long : 2;
|
||||
unsigned long i : 1;
|
||||
unsigned long : 1;
|
||||
unsigned long tt: 2;
|
||||
unsigned long : 2;
|
||||
};
|
||||
struct {
|
||||
unsigned long to:52;
|
||||
unsigned long : 1;
|
||||
unsigned long fc: 1;
|
||||
unsigned long p : 1;
|
||||
unsigned long : 1;
|
||||
unsigned long tf: 2;
|
||||
unsigned long i : 1;
|
||||
unsigned long : 1;
|
||||
unsigned long tt: 2;
|
||||
unsigned long tl: 2;
|
||||
} fc0;
|
||||
struct {
|
||||
unsigned long :47;
|
||||
unsigned long av : 1; /* ACCF-Validity Control */
|
||||
unsigned long acc: 4; /* Access-Control Bits */
|
||||
unsigned long f : 1; /* Fetch-Protection Bit */
|
||||
unsigned long fc : 1; /* Format-Control */
|
||||
unsigned long p : 1; /* DAT-Protection Bit */
|
||||
unsigned long iep: 1; /* Instruction-Execution-Protection */
|
||||
unsigned long : 2;
|
||||
unsigned long i : 1; /* Segment-Invalid Bit */
|
||||
unsigned long cs : 1; /* Common-Segment Bit */
|
||||
unsigned long tt : 2; /* Table-Type Bits */
|
||||
unsigned long : 2;
|
||||
} fc1;
|
||||
} h;
|
||||
struct {
|
||||
struct {
|
||||
unsigned long :47;
|
||||
unsigned long : 1; /* HW (should be 0) */
|
||||
unsigned long w : 1; /* Writable */
|
||||
unsigned long r : 1; /* Readable */
|
||||
unsigned long d : 1; /* Dirty */
|
||||
unsigned long y : 1; /* Young */
|
||||
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
|
||||
unsigned long : 3; /* HW */
|
||||
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
|
||||
unsigned long : 1;
|
||||
unsigned long : 4; /* HW */
|
||||
unsigned long sd : 1; /* Soft-Dirty */
|
||||
unsigned long pr : 1; /* Present */
|
||||
} fc1;
|
||||
} s;
|
||||
union {
|
||||
struct {
|
||||
unsigned long type :16; /* Token type */
|
||||
unsigned long par :16; /* Token parameter */
|
||||
unsigned long :26;
|
||||
unsigned long i : 1; /* Must be 1 */
|
||||
unsigned long : 1;
|
||||
unsigned long tt : 2;
|
||||
unsigned long : 1;
|
||||
unsigned long pr : 1; /* Must be 0 */
|
||||
};
|
||||
struct {
|
||||
unsigned long token:32; /* Token and parameter */
|
||||
unsigned long :32;
|
||||
};
|
||||
} tok;
|
||||
union pmd pmd;
|
||||
union pud pud;
|
||||
union p4d p4d;
|
||||
union pgd pgd;
|
||||
};
|
||||
|
||||
union skey {
|
||||
unsigned char skey;
|
||||
struct {
|
||||
unsigned char acc :4;
|
||||
unsigned char fp :1;
|
||||
unsigned char r :1;
|
||||
unsigned char c :1;
|
||||
unsigned char zero:1;
|
||||
};
|
||||
};
|
||||
|
||||
static_assert(sizeof(union pgste) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union pte) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union pmd) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union pud) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union p4d) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union pgd) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union crste) == sizeof(unsigned long));
|
||||
static_assert(sizeof(union skey) == sizeof(char));
|
||||
|
||||
struct segment_table {
|
||||
union pmd pmds[_CRST_ENTRIES];
|
||||
};
|
||||
|
||||
struct region3_table {
|
||||
union pud puds[_CRST_ENTRIES];
|
||||
};
|
||||
|
||||
struct region2_table {
|
||||
union p4d p4ds[_CRST_ENTRIES];
|
||||
};
|
||||
|
||||
struct region1_table {
|
||||
union pgd pgds[_CRST_ENTRIES];
|
||||
};
|
||||
|
||||
struct crst_table {
|
||||
union {
|
||||
union crste crstes[_CRST_ENTRIES];
|
||||
struct segment_table segment;
|
||||
struct region3_table region3;
|
||||
struct region2_table region2;
|
||||
struct region1_table region1;
|
||||
};
|
||||
};
|
||||
|
||||
struct page_table {
|
||||
union pte ptes[_PAGE_ENTRIES];
|
||||
union pgste pgstes[_PAGE_ENTRIES];
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct crst_table) == _CRST_TABLE_SIZE);
|
||||
static_assert(sizeof(struct page_table) == PAGE_SIZE);
|
||||
|
||||
struct dat_walk;
|
||||
|
||||
typedef long (*dat_walk_op)(union crste *crste, gfn_t gfn, gfn_t next, struct dat_walk *w);
|
||||
|
||||
struct dat_walk_ops {
|
||||
union {
|
||||
dat_walk_op crste_ops[4];
|
||||
struct {
|
||||
dat_walk_op pmd_entry;
|
||||
dat_walk_op pud_entry;
|
||||
dat_walk_op p4d_entry;
|
||||
dat_walk_op pgd_entry;
|
||||
};
|
||||
};
|
||||
long (*pte_entry)(union pte *pte, gfn_t gfn, gfn_t next, struct dat_walk *w);
|
||||
};
|
||||
|
||||
struct dat_walk {
|
||||
const struct dat_walk_ops *ops;
|
||||
union crste *last;
|
||||
union pte *last_pte;
|
||||
union asce asce;
|
||||
gfn_t start;
|
||||
gfn_t end;
|
||||
int flags;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct ptval_param {
|
||||
unsigned char offset : 6;
|
||||
unsigned char len : 2;
|
||||
};
|
||||
|
||||
/**
|
||||
* _pte() - Useful constructor for union pte
|
||||
* @pfn: the pfn this pte should point to.
|
||||
* @writable: whether the pte should be writable.
|
||||
* @dirty: whether the pte should be dirty.
|
||||
* @special: whether the pte should be marked as special
|
||||
*
|
||||
* The pte is also marked as young and present. If the pte is marked as dirty,
|
||||
* it gets marked as soft-dirty too. If the pte is not dirty, the hardware
|
||||
* protect bit is set (independently of the write softbit); this way proper
|
||||
* dirty tracking can be performed.
|
||||
*
|
||||
* Return: a union pte value.
|
||||
*/
|
||||
static inline union pte _pte(kvm_pfn_t pfn, bool writable, bool dirty, bool special)
|
||||
{
|
||||
union pte res = { .val = PFN_PHYS(pfn) };
|
||||
|
||||
res.h.p = !dirty;
|
||||
res.s.y = 1;
|
||||
res.s.pr = 1;
|
||||
res.s.w = writable;
|
||||
res.s.d = dirty;
|
||||
res.s.sd = dirty;
|
||||
res.s.s = special;
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline union crste _crste_fc0(kvm_pfn_t pfn, int tt)
|
||||
{
|
||||
union crste res = { .val = PFN_PHYS(pfn) };
|
||||
|
||||
res.h.tt = tt;
|
||||
res.h.fc0.tl = _REGION_ENTRY_LENGTH;
|
||||
res.h.fc0.tf = 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* _crste() - Useful constructor for union crste with FC=1
|
||||
* @pfn: the pfn this pte should point to.
|
||||
* @tt: the table type
|
||||
* @writable: whether the pte should be writable.
|
||||
* @dirty: whether the pte should be dirty.
|
||||
*
|
||||
* The crste is also marked as young and present. If the crste is marked as
|
||||
* dirty, it gets marked as soft-dirty too. If the crste is not dirty, the
|
||||
* hardware protect bit is set (independently of the write softbit); this way
|
||||
* proper dirty tracking can be performed.
|
||||
*
|
||||
* Return: a union crste value.
|
||||
*/
|
||||
static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool writable, bool dirty)
|
||||
{
|
||||
union crste res = { .val = PFN_PHYS(pfn) & _SEGMENT_MASK };
|
||||
|
||||
res.h.tt = tt;
|
||||
res.h.p = !dirty;
|
||||
res.h.fc = 1;
|
||||
res.s.fc1.y = 1;
|
||||
res.s.fc1.pr = 1;
|
||||
res.s.fc1.w = writable;
|
||||
res.s.fc1.d = dirty;
|
||||
res.s.fc1.sd = dirty;
|
||||
return res;
|
||||
}
|
||||
|
||||
union essa_state {
|
||||
unsigned char val;
|
||||
struct {
|
||||
unsigned char : 2;
|
||||
unsigned char nodat : 1;
|
||||
unsigned char exception : 1;
|
||||
unsigned char usage : 2;
|
||||
unsigned char content : 2;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vsie_rmap - reverse mapping for shadow page table entries
|
||||
* @next: pointer to next rmap in the list
|
||||
* @r_gfn: virtual rmap address in the shadow guest address space
|
||||
*/
|
||||
struct vsie_rmap {
|
||||
struct vsie_rmap *next;
|
||||
union {
|
||||
unsigned long val;
|
||||
struct {
|
||||
long level: 8;
|
||||
unsigned long : 4;
|
||||
unsigned long r_gfn:52;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static_assert(sizeof(struct vsie_rmap) == 2 * sizeof(long));
|
||||
|
||||
#define KVM_S390_MMU_CACHE_N_CRSTS 6
|
||||
#define KVM_S390_MMU_CACHE_N_PTS 2
|
||||
#define KVM_S390_MMU_CACHE_N_RMAPS 16
|
||||
struct kvm_s390_mmu_cache {
|
||||
void *crsts[KVM_S390_MMU_CACHE_N_CRSTS];
|
||||
void *pts[KVM_S390_MMU_CACHE_N_PTS];
|
||||
void *rmaps[KVM_S390_MMU_CACHE_N_RMAPS];
|
||||
short int n_crsts;
|
||||
short int n_pts;
|
||||
short int n_rmaps;
|
||||
};
|
||||
|
||||
struct guest_fault {
|
||||
gfn_t gfn; /* Guest frame */
|
||||
kvm_pfn_t pfn; /* Host PFN */
|
||||
struct page *page; /* Host page */
|
||||
union pte *ptep; /* Used to resolve the fault, or NULL */
|
||||
union crste *crstep; /* Used to resolve the fault, or NULL */
|
||||
bool writable; /* Mapping is writable */
|
||||
bool write_attempt; /* Write access attempted */
|
||||
bool attempt_pfault; /* Attempt a pfault first */
|
||||
bool valid; /* This entry contains valid data */
|
||||
void (*callback)(struct guest_fault *f);
|
||||
void *priv;
|
||||
};
|
||||
|
||||
/*
|
||||
* 0 1 2 3 4 5 6 7
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 0 | | PGT_ADDR |
|
||||
* 8 | VMADDR | |
|
||||
* 16 | |
|
||||
* 24 | |
|
||||
*/
|
||||
#define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
|
||||
#define PTVAL_PGT_ADDR MKPTVAL(4, 8)
|
||||
#define PTVAL_VMADDR MKPTVAL(8, 6)
|
||||
|
||||
union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
|
||||
gfn_t gfn, union asce asce, bool uses_skeys);
|
||||
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
|
||||
union asce asce);
|
||||
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce);
|
||||
|
||||
long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
|
||||
const struct dat_walk_ops *ops, int flags, void *priv);
|
||||
|
||||
int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
|
||||
int walk_level, union crste **last, union pte **ptepp);
|
||||
void dat_free_level(struct crst_table *table, bool owns_ptes);
|
||||
struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
|
||||
int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype);
|
||||
int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey);
|
||||
int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
|
||||
union skey skey, bool nq);
|
||||
int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn,
|
||||
union skey skey, union skey *oldkey, bool nq, bool mr, bool mc);
|
||||
int dat_reset_reference_bit(union asce asce, gfn_t gfn);
|
||||
long dat_reset_skeys(union asce asce, gfn_t start);
|
||||
|
||||
unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
|
||||
void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
|
||||
|
||||
int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
|
||||
u16 type, u16 param);
|
||||
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
|
||||
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
|
||||
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
|
||||
bool uses_skeys, struct guest_fault *f);
|
||||
|
||||
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
|
||||
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
|
||||
int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
|
||||
int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
|
||||
int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
|
||||
unsigned long count, unsigned long mask, const uint8_t *bits);
|
||||
|
||||
int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
|
||||
|
||||
#define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
|
||||
|
||||
static inline struct page_table *kvm_s390_mmu_cache_alloc_pt(struct kvm_s390_mmu_cache *mc)
|
||||
{
|
||||
if (mc->n_pts)
|
||||
return mc->pts[--mc->n_pts];
|
||||
return (void *)__get_free_page(GFP_KVM_S390_MMU_CACHE);
|
||||
}
|
||||
|
||||
static inline struct crst_table *kvm_s390_mmu_cache_alloc_crst(struct kvm_s390_mmu_cache *mc)
|
||||
{
|
||||
if (mc->n_crsts)
|
||||
return mc->crsts[--mc->n_crsts];
|
||||
return (void *)__get_free_pages(GFP_KVM_S390_MMU_CACHE | __GFP_COMP, CRST_ALLOC_ORDER);
|
||||
}
|
||||
|
||||
static inline struct vsie_rmap *kvm_s390_mmu_cache_alloc_rmap(struct kvm_s390_mmu_cache *mc)
|
||||
{
|
||||
if (mc->n_rmaps)
|
||||
return mc->rmaps[--mc->n_rmaps];
|
||||
return kzalloc(sizeof(struct vsie_rmap), GFP_KVM_S390_MMU_CACHE);
|
||||
}
|
||||
|
||||
static inline struct crst_table *crste_table_start(union crste *crstep)
|
||||
{
|
||||
return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE);
|
||||
}
|
||||
|
||||
static inline struct page_table *pte_table_start(union pte *ptep)
|
||||
{
|
||||
return (struct page_table *)ALIGN_DOWN((unsigned long)ptep, _PAGE_TABLE_SIZE);
|
||||
}
|
||||
|
||||
static inline bool crdte_crste(union crste *crstep, union crste old, union crste new, gfn_t gfn,
|
||||
union asce asce)
|
||||
{
|
||||
unsigned long dtt = 0x10 | new.h.tt << 2;
|
||||
void *table = crste_table_start(crstep);
|
||||
|
||||
return crdte(old.val, new.val, table, dtt, gfn_to_gpa(gfn), asce.val);
|
||||
}
|
||||
|
||||
/**
|
||||
* idte_crste() - invalidate a crste entry using idte
|
||||
* @crstep: pointer to the crste to be invalidated
|
||||
* @gfn: a gfn mapped by the crste
|
||||
* @opt: options for the idte instruction
|
||||
* @asce: the asce
|
||||
* @local: whether the operation is cpu-local
|
||||
*/
|
||||
static __always_inline void idte_crste(union crste *crstep, gfn_t gfn, unsigned long opt,
|
||||
union asce asce, int local)
|
||||
{
|
||||
unsigned long table_origin = __pa(crste_table_start(crstep));
|
||||
unsigned long gaddr = gfn_to_gpa(gfn) & HPAGE_MASK;
|
||||
|
||||
if (__builtin_constant_p(opt) && opt == 0) {
|
||||
/* flush without guest asce */
|
||||
asm volatile("idte %[table_origin],0,%[gaddr],%[local]"
|
||||
: "+m" (*crstep)
|
||||
: [table_origin] "a" (table_origin), [gaddr] "a" (gaddr),
|
||||
[local] "i" (local)
|
||||
: "cc");
|
||||
} else {
|
||||
/* flush with guest asce */
|
||||
asm volatile("idte %[table_origin],%[asce],%[gaddr_opt],%[local]"
|
||||
: "+m" (*crstep)
|
||||
: [table_origin] "a" (table_origin), [gaddr_opt] "a" (gaddr | opt),
|
||||
[asce] "a" (asce.val), [local] "i" (local)
|
||||
: "cc");
|
||||
}
|
||||
}
|
||||
|
||||
static inline void dat_init_pgstes(struct page_table *pt, unsigned long val)
|
||||
{
|
||||
memset64((void *)pt->pgstes, val, PTRS_PER_PTE);
|
||||
}
|
||||
|
||||
static inline void dat_init_page_table(struct page_table *pt, unsigned long ptes,
|
||||
unsigned long pgstes)
|
||||
{
|
||||
memset64((void *)pt->ptes, ptes, PTRS_PER_PTE);
|
||||
dat_init_pgstes(pt, pgstes);
|
||||
}
|
||||
|
||||
static inline gfn_t asce_end(union asce asce)
|
||||
{
|
||||
return 1ULL << ((asce.dt + 1) * 11 + _SEGMENT_SHIFT - PAGE_SHIFT);
|
||||
}
|
||||
|
||||
#define _CRSTE(x) ((union crste) { .val = _Generic((x), \
|
||||
union pgd : (x).val, \
|
||||
union p4d : (x).val, \
|
||||
union pud : (x).val, \
|
||||
union pmd : (x).val, \
|
||||
union crste : (x).val)})
|
||||
|
||||
#define _CRSTEP(x) ((union crste *)_Generic((*(x)), \
|
||||
union pgd : (x), \
|
||||
union p4d : (x), \
|
||||
union pud : (x), \
|
||||
union pmd : (x), \
|
||||
union crste : (x)))
|
||||
|
||||
#define _CRSTP(x) ((struct crst_table *)_Generic((*(x)), \
|
||||
struct crst_table : (x), \
|
||||
struct segment_table : (x), \
|
||||
struct region3_table : (x), \
|
||||
struct region2_table : (x), \
|
||||
struct region1_table : (x)))
|
||||
|
||||
static inline bool asce_contains_gfn(union asce asce, gfn_t gfn)
|
||||
{
|
||||
return gfn < asce_end(asce);
|
||||
}
|
||||
|
||||
static inline bool is_pmd(union crste crste)
|
||||
{
|
||||
return crste.h.tt == TABLE_TYPE_SEGMENT;
|
||||
}
|
||||
|
||||
static inline bool is_pud(union crste crste)
|
||||
{
|
||||
return crste.h.tt == TABLE_TYPE_REGION3;
|
||||
}
|
||||
|
||||
static inline bool is_p4d(union crste crste)
|
||||
{
|
||||
return crste.h.tt == TABLE_TYPE_REGION2;
|
||||
}
|
||||
|
||||
static inline bool is_pgd(union crste crste)
|
||||
{
|
||||
return crste.h.tt == TABLE_TYPE_REGION1;
|
||||
}
|
||||
|
||||
static inline phys_addr_t pmd_origin_large(union pmd pmd)
|
||||
{
|
||||
return pmd.val & _SEGMENT_ENTRY_ORIGIN_LARGE;
|
||||
}
|
||||
|
||||
static inline phys_addr_t pud_origin_large(union pud pud)
|
||||
{
|
||||
return pud.val & _REGION3_ENTRY_ORIGIN_LARGE;
|
||||
}
|
||||
|
||||
/**
|
||||
* crste_origin_large() - Return the large frame origin of a large crste
|
||||
* @crste: The crste whose origin is to be returned. Should be either a
|
||||
* region-3 table entry or a segment table entry, in both cases with
|
||||
* FC set to 1 (large pages).
|
||||
*
|
||||
* Return: The origin of the large frame pointed to by @crste, or -1 if the
|
||||
* crste was not large (wrong table type, or FC==0)
|
||||
*/
|
||||
static inline phys_addr_t crste_origin_large(union crste crste)
|
||||
{
|
||||
if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
|
||||
return -1;
|
||||
if (is_pmd(crste))
|
||||
return pmd_origin_large(crste.pmd);
|
||||
return pud_origin_large(crste.pud);
|
||||
}
|
||||
|
||||
#define crste_origin(x) (_Generic((x), \
|
||||
union pmd : (x).val & _SEGMENT_ENTRY_ORIGIN, \
|
||||
union pud : (x).val & _REGION_ENTRY_ORIGIN, \
|
||||
union p4d : (x).val & _REGION_ENTRY_ORIGIN, \
|
||||
union pgd : (x).val & _REGION_ENTRY_ORIGIN))
|
||||
|
||||
static inline unsigned long pte_origin(union pte pte)
|
||||
{
|
||||
return pte.val & PAGE_MASK;
|
||||
}
|
||||
|
||||
static inline bool pmd_prefix(union pmd pmd)
|
||||
{
|
||||
return pmd.h.fc && pmd.s.fc1.prefix_notif;
|
||||
}
|
||||
|
||||
static inline bool pud_prefix(union pud pud)
|
||||
{
|
||||
return pud.h.fc && pud.s.fc1.prefix_notif;
|
||||
}
|
||||
|
||||
static inline bool crste_leaf(union crste crste)
|
||||
{
|
||||
return (crste.h.tt <= TABLE_TYPE_REGION3) && crste.h.fc;
|
||||
}
|
||||
|
||||
static inline bool crste_prefix(union crste crste)
|
||||
{
|
||||
return crste_leaf(crste) && crste.s.fc1.prefix_notif;
|
||||
}
|
||||
|
||||
static inline bool crste_dirty(union crste crste)
|
||||
{
|
||||
return crste_leaf(crste) && crste.s.fc1.d;
|
||||
}
|
||||
|
||||
static inline union pgste *pgste_of(union pte *pte)
|
||||
{
|
||||
return (union pgste *)(pte + _PAGE_ENTRIES);
|
||||
}
|
||||
|
||||
static inline bool pte_hole(union pte pte)
|
||||
{
|
||||
return pte.h.i && !pte.tok.pr && pte.tok.type != _DAT_TOKEN_NONE;
|
||||
}
|
||||
|
||||
static inline bool _crste_hole(union crste crste)
|
||||
{
|
||||
return crste.h.i && !crste.tok.pr && crste.tok.type != _DAT_TOKEN_NONE;
|
||||
}
|
||||
|
||||
#define crste_hole(x) _crste_hole(_CRSTE(x))
|
||||
|
||||
static inline bool _crste_none(union crste crste)
|
||||
{
|
||||
return crste.h.i && !crste.tok.pr && crste.tok.type == _DAT_TOKEN_NONE;
|
||||
}
|
||||
|
||||
#define crste_none(x) _crste_none(_CRSTE(x))
|
||||
|
||||
static inline phys_addr_t large_pud_to_phys(union pud pud, gfn_t gfn)
|
||||
{
|
||||
return pud_origin_large(pud) | (gfn_to_gpa(gfn) & ~_REGION3_MASK);
|
||||
}
|
||||
|
||||
static inline phys_addr_t large_pmd_to_phys(union pmd pmd, gfn_t gfn)
|
||||
{
|
||||
return pmd_origin_large(pmd) | (gfn_to_gpa(gfn) & ~_SEGMENT_MASK);
|
||||
}
|
||||
|
||||
static inline phys_addr_t large_crste_to_phys(union crste crste, gfn_t gfn)
|
||||
{
|
||||
if (unlikely(!crste.h.fc || crste.h.tt > TABLE_TYPE_REGION3))
|
||||
return -1;
|
||||
if (is_pmd(crste))
|
||||
return large_pmd_to_phys(crste.pmd, gfn);
|
||||
return large_pud_to_phys(crste.pud, gfn);
|
||||
}
|
||||
|
||||
static inline bool cspg_crste(union crste *crstep, union crste old, union crste new)
|
||||
{
|
||||
return cspg(&crstep->val, old.val, new.val);
|
||||
}
|
||||
|
||||
static inline struct page_table *dereference_pmd(union pmd pmd)
|
||||
{
|
||||
return phys_to_virt(crste_origin(pmd));
|
||||
}
|
||||
|
||||
static inline struct segment_table *dereference_pud(union pud pud)
|
||||
{
|
||||
return phys_to_virt(crste_origin(pud));
|
||||
}
|
||||
|
||||
static inline struct region3_table *dereference_p4d(union p4d p4d)
|
||||
{
|
||||
return phys_to_virt(crste_origin(p4d));
|
||||
}
|
||||
|
||||
static inline struct region2_table *dereference_pgd(union pgd pgd)
|
||||
{
|
||||
return phys_to_virt(crste_origin(pgd));
|
||||
}
|
||||
|
||||
static inline struct crst_table *_dereference_crste(union crste crste)
|
||||
{
|
||||
if (unlikely(is_pmd(crste)))
|
||||
return NULL;
|
||||
return phys_to_virt(crste_origin(crste.pud));
|
||||
}
|
||||
|
||||
#define dereference_crste(x) (_Generic((x), \
|
||||
union pud : _dereference_crste(_CRSTE(x)), \
|
||||
union p4d : _dereference_crste(_CRSTE(x)), \
|
||||
union pgd : _dereference_crste(_CRSTE(x)), \
|
||||
union crste : _dereference_crste(_CRSTE(x))))
|
||||
|
||||
static inline struct crst_table *dereference_asce(union asce asce)
|
||||
{
|
||||
return phys_to_virt(asce.val & _ASCE_ORIGIN);
|
||||
}
|
||||
|
||||
static inline void asce_flush_tlb(union asce asce)
|
||||
{
|
||||
__tlb_flush_idte(asce.val);
|
||||
}
|
||||
|
||||
static inline bool pgste_get_trylock(union pte *ptep, union pgste *res)
|
||||
{
|
||||
union pgste *pgstep = pgste_of(ptep);
|
||||
union pgste old_pgste;
|
||||
|
||||
if (READ_ONCE(pgstep->val) & PGSTE_PCL_BIT)
|
||||
return false;
|
||||
old_pgste.val = __atomic64_or_barrier(PGSTE_PCL_BIT, &pgstep->val);
|
||||
if (old_pgste.pcl)
|
||||
return false;
|
||||
old_pgste.pcl = 1;
|
||||
*res = old_pgste;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline union pgste pgste_get_lock(union pte *ptep)
|
||||
{
|
||||
union pgste res;
|
||||
|
||||
while (!pgste_get_trylock(ptep, &res))
|
||||
cpu_relax();
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline void pgste_set_unlock(union pte *ptep, union pgste pgste)
|
||||
{
|
||||
pgste.pcl = 0;
|
||||
barrier();
|
||||
WRITE_ONCE(*pgste_of(ptep), pgste);
|
||||
}
|
||||
|
||||
static inline void dat_ptep_xchg(union pte *ptep, union pte new, gfn_t gfn, union asce asce,
|
||||
bool has_skeys)
|
||||
{
|
||||
union pgste pgste;
|
||||
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = __dat_ptep_xchg(ptep, pgste, new, gfn, asce, has_skeys);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
}
|
||||
|
||||
static inline void dat_ptep_clear(union pte *ptep, gfn_t gfn, union asce asce, bool has_skeys)
|
||||
{
|
||||
dat_ptep_xchg(ptep, _PTE_EMPTY, gfn, asce, has_skeys);
|
||||
}
|
||||
|
||||
static inline void dat_free_pt(struct page_table *pt)
|
||||
{
|
||||
free_page((unsigned long)pt);
|
||||
}
|
||||
|
||||
static inline void _dat_free_crst(struct crst_table *table)
|
||||
{
|
||||
free_pages((unsigned long)table, CRST_ALLOC_ORDER);
|
||||
}
|
||||
|
||||
#define dat_free_crst(x) _dat_free_crst(_CRSTP(x))
|
||||
|
||||
static inline void kvm_s390_free_mmu_cache(struct kvm_s390_mmu_cache *mc)
|
||||
{
|
||||
if (!mc)
|
||||
return;
|
||||
while (mc->n_pts)
|
||||
dat_free_pt(mc->pts[--mc->n_pts]);
|
||||
while (mc->n_crsts)
|
||||
_dat_free_crst(mc->crsts[--mc->n_crsts]);
|
||||
while (mc->n_rmaps)
|
||||
kfree(mc->rmaps[--mc->n_rmaps]);
|
||||
kfree(mc);
|
||||
}
|
||||
|
||||
DEFINE_FREE(kvm_s390_mmu_cache, struct kvm_s390_mmu_cache *, if (_T) kvm_s390_free_mmu_cache(_T))
|
||||
|
||||
static inline struct kvm_s390_mmu_cache *kvm_s390_new_mmu_cache(void)
|
||||
{
|
||||
struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
|
||||
|
||||
mc = kzalloc(sizeof(*mc), GFP_KERNEL_ACCOUNT);
|
||||
if (mc && !kvm_s390_mmu_cache_topup(mc))
|
||||
return_ptr(mc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool dat_pmdp_xchg_atomic(union pmd *pmdp, union pmd old, union pmd new,
|
||||
gfn_t gfn, union asce asce)
|
||||
{
|
||||
return dat_crstep_xchg_atomic(_CRSTEP(pmdp), _CRSTE(old), _CRSTE(new), gfn, asce);
|
||||
}
|
||||
|
||||
static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pud new,
|
||||
gfn_t gfn, union asce asce)
|
||||
{
|
||||
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
|
||||
}
|
||||
|
||||
static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
|
||||
{
|
||||
union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
|
||||
|
||||
dat_crstep_xchg(crstep, newcrste, gfn, asce);
|
||||
}
|
||||
|
||||
static inline int get_level(union crste *crstep, union pte *ptep)
|
||||
{
|
||||
return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
|
||||
}
|
||||
|
||||
static inline int dat_delete_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
|
||||
unsigned long npages)
|
||||
{
|
||||
return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
|
||||
}
|
||||
|
||||
static inline int dat_create_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
|
||||
unsigned long npages)
|
||||
{
|
||||
return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_NONE, 0);
|
||||
}
|
||||
|
||||
static inline bool crste_is_ucas(union crste crste)
|
||||
{
|
||||
return is_pmd(crste) && crste.h.i && crste.h.fc0.tl == 1 && crste.h.fc == 0;
|
||||
}
|
||||
|
||||
#endif /* __KVM_S390_DAT_H */
|
||||
|
|
@ -10,13 +10,13 @@
|
|||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/gmap_helpers.h>
|
||||
#include <asm/virtio-ccw.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "gmap.h"
|
||||
|
||||
static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
|
||||
{
|
||||
|
|
|
|||
148
arch/s390/kvm/faultin.c
Normal file
148
arch/s390/kvm/faultin.c
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KVM guest fault handling.
|
||||
*
|
||||
* Copyright IBM Corp. 2025
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include "gmap.h"
|
||||
#include "trace.h"
|
||||
#include "faultin.h"
|
||||
|
||||
bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu);
|
||||
|
||||
/*
|
||||
* kvm_s390_faultin_gfn() - handle a dat fault.
|
||||
* @vcpu: The vCPU whose gmap is to be fixed up, or NULL if operating on the VM.
|
||||
* @kvm: The VM whose gmap is to be fixed up, or NULL if operating on a vCPU.
|
||||
* @f: The guest fault that needs to be resolved.
|
||||
*
|
||||
* Return:
|
||||
* * 0 on success
|
||||
* * < 0 in case of error
|
||||
* * > 0 in case of guest exceptions
|
||||
*
|
||||
* Context:
|
||||
* * The mm lock must not be held before calling
|
||||
* * kvm->srcu must be held
|
||||
* * may sleep
|
||||
*/
|
||||
int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fault *f)
|
||||
{
|
||||
struct kvm_s390_mmu_cache *local_mc __free(kvm_s390_mmu_cache) = NULL;
|
||||
struct kvm_s390_mmu_cache *mc = NULL;
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long inv_seq;
|
||||
int foll, rc = 0;
|
||||
|
||||
foll = f->write_attempt ? FOLL_WRITE : 0;
|
||||
foll |= f->attempt_pfault ? FOLL_NOWAIT : 0;
|
||||
|
||||
if (vcpu) {
|
||||
kvm = vcpu->kvm;
|
||||
mc = vcpu->arch.mc;
|
||||
}
|
||||
|
||||
lockdep_assert_held(&kvm->srcu);
|
||||
|
||||
scoped_guard(read_lock, &kvm->mmu_lock) {
|
||||
if (gmap_try_fixup_minor(kvm->arch.gmap, f) == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
f->valid = false;
|
||||
inv_seq = kvm->mmu_invalidate_seq;
|
||||
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
|
||||
smp_rmb();
|
||||
|
||||
if (vcpu)
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, f->gfn);
|
||||
else
|
||||
slot = gfn_to_memslot(kvm, f->gfn);
|
||||
f->pfn = __kvm_faultin_pfn(slot, f->gfn, foll, &f->writable, &f->page);
|
||||
|
||||
/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT). */
|
||||
if (f->pfn == KVM_PFN_ERR_NEEDS_IO) {
|
||||
if (unlikely(!f->attempt_pfault))
|
||||
return -EAGAIN;
|
||||
if (unlikely(!vcpu))
|
||||
return -EINVAL;
|
||||
trace_kvm_s390_major_guest_pfault(vcpu);
|
||||
if (kvm_arch_setup_async_pf(vcpu))
|
||||
return 0;
|
||||
vcpu->stat.pfault_sync++;
|
||||
/* Could not setup async pfault, try again synchronously. */
|
||||
foll &= ~FOLL_NOWAIT;
|
||||
f->pfn = __kvm_faultin_pfn(slot, f->gfn, foll, &f->writable, &f->page);
|
||||
}
|
||||
|
||||
/* Access outside memory, addressing exception. */
|
||||
if (is_noslot_pfn(f->pfn))
|
||||
return PGM_ADDRESSING;
|
||||
/* Signal pending: try again. */
|
||||
if (f->pfn == KVM_PFN_ERR_SIGPENDING)
|
||||
return -EAGAIN;
|
||||
/* Check if it's read-only memory; don't try to actually handle that case. */
|
||||
if (f->pfn == KVM_PFN_ERR_RO_FAULT)
|
||||
return -EOPNOTSUPP;
|
||||
/* Any other error. */
|
||||
if (is_error_pfn(f->pfn))
|
||||
return -EFAULT;
|
||||
|
||||
if (!mc) {
|
||||
local_mc = kvm_s390_new_mmu_cache();
|
||||
if (!local_mc)
|
||||
return -ENOMEM;
|
||||
mc = local_mc;
|
||||
}
|
||||
|
||||
/* Loop, will automatically release the faulted page. */
|
||||
if (mmu_invalidate_retry_gfn_unsafe(kvm, inv_seq, f->gfn)) {
|
||||
kvm_release_faultin_page(kvm, f->page, true, false);
|
||||
continue;
|
||||
}
|
||||
|
||||
scoped_guard(read_lock, &kvm->mmu_lock) {
|
||||
if (!mmu_invalidate_retry_gfn(kvm, inv_seq, f->gfn)) {
|
||||
f->valid = true;
|
||||
rc = gmap_link(mc, kvm->arch.gmap, f);
|
||||
kvm_release_faultin_page(kvm, f->page, !!rc, f->write_attempt);
|
||||
f->page = NULL;
|
||||
}
|
||||
}
|
||||
kvm_release_faultin_page(kvm, f->page, true, false);
|
||||
|
||||
if (rc == -ENOMEM) {
|
||||
rc = kvm_s390_mmu_cache_topup(mc);
|
||||
if (rc)
|
||||
return rc;
|
||||
} else if (rc != -EAGAIN) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w)
|
||||
{
|
||||
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
|
||||
int foll = w ? FOLL_WRITE : 0;
|
||||
|
||||
f->write_attempt = w;
|
||||
f->gfn = gfn;
|
||||
f->pfn = __kvm_faultin_pfn(slot, gfn, foll, &f->writable, &f->page);
|
||||
if (is_noslot_pfn(f->pfn))
|
||||
return PGM_ADDRESSING;
|
||||
if (is_sigpending_pfn(f->pfn))
|
||||
return -EINTR;
|
||||
if (f->pfn == KVM_PFN_ERR_NEEDS_IO)
|
||||
return -EAGAIN;
|
||||
if (is_error_pfn(f->pfn))
|
||||
return -EFAULT;
|
||||
|
||||
f->valid = true;
|
||||
return 0;
|
||||
}
|
||||
92
arch/s390/kvm/faultin.h
Normal file
92
arch/s390/kvm/faultin.h
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM guest fault handling.
|
||||
*
|
||||
* Copyright IBM Corp. 2025
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_S390_FAULTIN_H
|
||||
#define __KVM_S390_FAULTIN_H
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include "dat.h"
|
||||
|
||||
int kvm_s390_faultin_gfn(struct kvm_vcpu *vcpu, struct kvm *kvm, struct guest_fault *f);
|
||||
int kvm_s390_get_guest_page(struct kvm *kvm, struct guest_fault *f, gfn_t gfn, bool w);
|
||||
|
||||
static inline int kvm_s390_faultin_gfn_simple(struct kvm_vcpu *vcpu, struct kvm *kvm,
|
||||
gfn_t gfn, bool wr)
|
||||
{
|
||||
struct guest_fault f = { .gfn = gfn, .write_attempt = wr, };
|
||||
|
||||
return kvm_s390_faultin_gfn(vcpu, kvm, &f);
|
||||
}
|
||||
|
||||
static inline int kvm_s390_get_guest_page_and_read_gpa(struct kvm *kvm, struct guest_fault *f,
|
||||
gpa_t gaddr, unsigned long *val)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = kvm_s390_get_guest_page(kvm, f, gpa_to_gfn(gaddr), false);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
*val = *(unsigned long *)phys_to_virt(pfn_to_phys(f->pfn) | offset_in_page(gaddr));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_s390_release_multiple(struct kvm *kvm, struct guest_fault *guest_faults,
|
||||
int n, bool ignore)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
kvm_release_faultin_page(kvm, guest_faults[i].page, ignore,
|
||||
guest_faults[i].write_attempt);
|
||||
guest_faults[i].page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool kvm_s390_multiple_faults_need_retry(struct kvm *kvm, unsigned long seq,
|
||||
struct guest_fault *guest_faults, int n,
|
||||
bool unsafe)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!guest_faults[i].valid)
|
||||
continue;
|
||||
if (unsafe && mmu_invalidate_retry_gfn_unsafe(kvm, seq, guest_faults[i].gfn))
|
||||
return true;
|
||||
if (!unsafe && mmu_invalidate_retry_gfn(kvm, seq, guest_faults[i].gfn))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int kvm_s390_get_guest_pages(struct kvm *kvm, struct guest_fault *guest_faults,
|
||||
gfn_t start, int n_pages, bool write_attempt)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < n_pages; i++) {
|
||||
rc = kvm_s390_get_guest_page(kvm, guest_faults + i, start + i, write_attempt);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define kvm_s390_release_faultin_array(kvm, array, ignore) \
|
||||
kvm_s390_release_multiple(kvm, array, ARRAY_SIZE(array), ignore)
|
||||
|
||||
#define kvm_s390_array_needs_retry_unsafe(kvm, seq, array) \
|
||||
kvm_s390_multiple_faults_need_retry(kvm, seq, array, ARRAY_SIZE(array), true)
|
||||
|
||||
#define kvm_s390_array_needs_retry_safe(kvm, seq, array) \
|
||||
kvm_s390_multiple_faults_need_retry(kvm, seq, array, ARRAY_SIZE(array), false)
|
||||
|
||||
#endif /* __KVM_S390_FAULTIN_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -206,8 +206,8 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
|||
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
|
||||
void *data, unsigned long len, enum gacc_mode mode);
|
||||
|
||||
int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
|
||||
__uint128_t new, u8 access_key, bool *success);
|
||||
int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old,
|
||||
union kvm_s390_quad new, u8 access_key, bool *success);
|
||||
|
||||
/**
|
||||
* write_guest_with_key - copy data from kernel space to guest space
|
||||
|
|
@ -450,11 +450,17 @@ void ipte_unlock(struct kvm *kvm);
|
|||
int ipte_lock_held(struct kvm *kvm);
|
||||
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
|
||||
|
||||
/* MVPG PEI indication bits */
|
||||
#define PEI_DAT_PROT 2
|
||||
#define PEI_NOT_PTE 4
|
||||
union mvpg_pei {
|
||||
unsigned long val;
|
||||
struct {
|
||||
unsigned long addr : 61;
|
||||
unsigned long not_pte : 1;
|
||||
unsigned long dat_prot: 1;
|
||||
unsigned long real : 1;
|
||||
};
|
||||
};
|
||||
|
||||
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
|
||||
unsigned long saddr, unsigned long *datptr);
|
||||
int gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr,
|
||||
union mvpg_pei *datptr, bool wr);
|
||||
|
||||
#endif /* __KVM_S390_GACCESS_H */
|
||||
|
|
|
|||
|
|
@ -1,141 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Guest memory management for KVM/s390 nested VMs.
|
||||
*
|
||||
* Copyright IBM Corp. 2008, 2020, 2024
|
||||
*
|
||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
* Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
* Janosch Frank <frankja@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <asm/lowcore.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
|
||||
#include "kvm-s390.h"
|
||||
|
||||
/**
|
||||
* gmap_find_shadow - find a specific asce in the list of shadow tables
|
||||
* @parent: pointer to the parent gmap
|
||||
* @asce: ASCE for which the shadow table is created
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* Returns the pointer to a gmap if a shadow table with the given asce is
|
||||
* already available, ERR_PTR(-EAGAIN) if another one is just being created,
|
||||
* otherwise NULL
|
||||
*
|
||||
* Context: Called with parent->shadow_lock held
|
||||
*/
|
||||
static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
|
||||
{
|
||||
struct gmap *sg;
|
||||
|
||||
lockdep_assert_held(&parent->shadow_lock);
|
||||
list_for_each_entry(sg, &parent->children, list) {
|
||||
if (!gmap_shadow_valid(sg, asce, edat_level))
|
||||
continue;
|
||||
if (!sg->initialized)
|
||||
return ERR_PTR(-EAGAIN);
|
||||
refcount_inc(&sg->ref_count);
|
||||
return sg;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_shadow - create/find a shadow guest address space
|
||||
* @parent: pointer to the parent gmap
|
||||
* @asce: ASCE for which the shadow table is created
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* The pages of the top level page table referred by the asce parameter
|
||||
* will be set to read-only and marked in the PGSTEs of the kvm process.
|
||||
* The shadow table will be removed automatically on any change to the
|
||||
* PTE mapping for the source table.
|
||||
*
|
||||
* Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
|
||||
* ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
|
||||
* parent gmap table could not be protected.
|
||||
*/
|
||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
|
||||
{
|
||||
struct gmap *sg, *new;
|
||||
unsigned long limit;
|
||||
int rc;
|
||||
|
||||
if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
|
||||
KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
|
||||
return ERR_PTR(-EFAULT);
|
||||
spin_lock(&parent->shadow_lock);
|
||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
if (sg)
|
||||
return sg;
|
||||
/* Create a new shadow gmap */
|
||||
limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
|
||||
if (asce & _ASCE_REAL_SPACE)
|
||||
limit = -1UL;
|
||||
new = gmap_alloc(limit);
|
||||
if (!new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
new->mm = parent->mm;
|
||||
new->parent = gmap_get(parent);
|
||||
new->private = parent->private;
|
||||
new->orig_asce = asce;
|
||||
new->edat_level = edat_level;
|
||||
new->initialized = false;
|
||||
spin_lock(&parent->shadow_lock);
|
||||
/* Recheck if another CPU created the same shadow */
|
||||
sg = gmap_find_shadow(parent, asce, edat_level);
|
||||
if (sg) {
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
gmap_free(new);
|
||||
return sg;
|
||||
}
|
||||
if (asce & _ASCE_REAL_SPACE) {
|
||||
/* only allow one real-space gmap shadow */
|
||||
list_for_each_entry(sg, &parent->children, list) {
|
||||
if (sg->orig_asce & _ASCE_REAL_SPACE) {
|
||||
spin_lock(&sg->guest_table_lock);
|
||||
gmap_unshadow(sg);
|
||||
spin_unlock(&sg->guest_table_lock);
|
||||
list_del(&sg->list);
|
||||
gmap_put(sg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
refcount_set(&new->ref_count, 2);
|
||||
list_add(&new->list, &parent->children);
|
||||
if (asce & _ASCE_REAL_SPACE) {
|
||||
/* nothing to protect, return right away */
|
||||
new->initialized = true;
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
return new;
|
||||
}
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
/* protect after insertion, so it will get properly invalidated */
|
||||
mmap_read_lock(parent->mm);
|
||||
rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
|
||||
((asce & _ASCE_TABLE_LENGTH) + 1),
|
||||
PROT_READ, GMAP_NOTIFY_SHADOW);
|
||||
mmap_read_unlock(parent->mm);
|
||||
spin_lock(&parent->shadow_lock);
|
||||
new->initialized = true;
|
||||
if (rc) {
|
||||
list_del(&new->list);
|
||||
gmap_free(new);
|
||||
new = ERR_PTR(rc);
|
||||
}
|
||||
spin_unlock(&parent->shadow_lock);
|
||||
return new;
|
||||
}
|
||||
1244
arch/s390/kvm/gmap.c
Normal file
1244
arch/s390/kvm/gmap.c
Normal file
File diff suppressed because it is too large
Load diff
244
arch/s390/kvm/gmap.h
Normal file
244
arch/s390/kvm/gmap.h
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* KVM guest address space mapping code
|
||||
*
|
||||
* Copyright IBM Corp. 2007, 2016, 2025
|
||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
||||
* Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef ARCH_KVM_S390_GMAP_H
|
||||
#define ARCH_KVM_S390_GMAP_H
|
||||
|
||||
#include "dat.h"
|
||||
|
||||
/**
|
||||
* enum gmap_flags - Flags of a gmap.
|
||||
*
|
||||
* @GMAP_FLAG_SHADOW: The gmap is a vsie shadow gmap.
|
||||
* @GMAP_FLAG_OWNS_PAGETABLES: The gmap owns all dat levels; normally 1, is 0
|
||||
* only for ucontrol per-cpu gmaps, since they
|
||||
* share the page tables with the main gmap.
|
||||
* @GMAP_FLAG_IS_UCONTROL: The gmap is ucontrol (main gmap or per-cpu gmap).
|
||||
* @GMAP_FLAG_ALLOW_HPAGE_1M: 1M hugepages are allowed for this gmap,
|
||||
* independently of the page size used by userspace.
|
||||
* @GMAP_FLAG_ALLOW_HPAGE_2G: 2G hugepages are allowed for this gmap,
|
||||
* independently of the page size used by userspace.
|
||||
* @GMAP_FLAG_PFAULT_ENABLED: Pfault is enabled for the gmap.
|
||||
* @GMAP_FLAG_USES_SKEYS: If the guest uses storage keys.
|
||||
* @GMAP_FLAG_USES_CMM: Whether the guest uses CMMA.
|
||||
* @GMAP_FLAG_EXPORT_ON_UNMAP: Whether to export guest pages when unmapping.
|
||||
*/
|
||||
enum gmap_flags {
|
||||
GMAP_FLAG_SHADOW = 0,
|
||||
GMAP_FLAG_OWNS_PAGETABLES,
|
||||
GMAP_FLAG_IS_UCONTROL,
|
||||
GMAP_FLAG_ALLOW_HPAGE_1M,
|
||||
GMAP_FLAG_ALLOW_HPAGE_2G,
|
||||
GMAP_FLAG_PFAULT_ENABLED,
|
||||
GMAP_FLAG_USES_SKEYS,
|
||||
GMAP_FLAG_USES_CMM,
|
||||
GMAP_FLAG_EXPORT_ON_UNMAP,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct gmap_struct - Guest address space.
|
||||
*
|
||||
* @flags: GMAP_FLAG_* flags.
|
||||
* @edat_level: The edat level of this shadow gmap.
|
||||
* @kvm: The vm.
|
||||
* @asce: The ASCE used by this gmap.
|
||||
* @list: List head used in children gmaps for the children gmap list.
|
||||
* @children_lock: Protects children and scb_users.
|
||||
* @children: List of child gmaps of this gmap.
|
||||
* @scb_users: List of vsie_scb that use this shadow gmap.
|
||||
* @parent: Parent gmap of a child gmap.
|
||||
* @guest_asce: Original ASCE of this shadow gmap.
|
||||
* @host_to_rmap_lock: Protects host_to_rmap.
|
||||
* @host_to_rmap: Radix tree mapping host addresses to guest addresses.
|
||||
*/
|
||||
struct gmap {
|
||||
unsigned long flags;
|
||||
unsigned char edat_level;
|
||||
struct kvm *kvm;
|
||||
union asce asce;
|
||||
struct list_head list;
|
||||
spinlock_t children_lock; /* Protects: children, scb_users */
|
||||
struct list_head children;
|
||||
struct list_head scb_users;
|
||||
struct gmap *parent;
|
||||
union asce guest_asce;
|
||||
spinlock_t host_to_rmap_lock; /* Protects host_to_rmap */
|
||||
struct radix_tree_root host_to_rmap;
|
||||
refcount_t refcount;
|
||||
};
|
||||
|
||||
struct gmap_cache {
|
||||
struct list_head list;
|
||||
struct gmap *gmap;
|
||||
};
|
||||
|
||||
#define gmap_for_each_rmap_safe(pos, n, head) \
|
||||
for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
|
||||
|
||||
int s390_replace_asce(struct gmap *gmap);
|
||||
bool _gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end, bool hint);
|
||||
bool gmap_age_gfn(struct gmap *gmap, gfn_t start, gfn_t end);
|
||||
bool gmap_unmap_gfn_range(struct gmap *gmap, struct kvm_memory_slot *slot, gfn_t start, gfn_t end);
|
||||
int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault);
|
||||
struct gmap *gmap_new(struct kvm *kvm, gfn_t limit);
|
||||
struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit);
|
||||
void gmap_remove_child(struct gmap *child);
|
||||
void gmap_dispose(struct gmap *gmap);
|
||||
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *fault);
|
||||
void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end);
|
||||
int gmap_set_limit(struct gmap *gmap, gfn_t limit);
|
||||
int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t *gaddr);
|
||||
int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count);
|
||||
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count);
|
||||
int gmap_enable_skeys(struct gmap *gmap);
|
||||
int gmap_pv_destroy_range(struct gmap *gmap, gfn_t start, gfn_t end, bool interruptible);
|
||||
int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level);
|
||||
int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
|
||||
kvm_pfn_t pfn, int level, bool wr);
|
||||
void gmap_set_cmma_all_dirty(struct gmap *gmap);
|
||||
void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn);
|
||||
struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
|
||||
union asce asce, int edat_level);
|
||||
void gmap_split_huge_pages(struct gmap *gmap);
|
||||
|
||||
static inline bool uses_skeys(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_USES_SKEYS, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline bool uses_cmm(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_USES_CMM, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline bool pfault_enabled(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_PFAULT_ENABLED, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline bool is_ucontrol(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_IS_UCONTROL, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline bool is_shadow(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_SHADOW, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline bool owns_page_tables(struct gmap *gmap)
|
||||
{
|
||||
return test_bit(GMAP_FLAG_OWNS_PAGETABLES, &gmap->flags);
|
||||
}
|
||||
|
||||
static inline struct gmap *gmap_put(struct gmap *gmap)
|
||||
{
|
||||
if (refcount_dec_and_test(&gmap->refcount))
|
||||
gmap_dispose(gmap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void gmap_get(struct gmap *gmap)
|
||||
{
|
||||
WARN_ON_ONCE(unlikely(!refcount_inc_not_zero(&gmap->refcount)));
|
||||
}
|
||||
|
||||
static inline void gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn)
|
||||
{
|
||||
scoped_guard(spinlock, &parent->children_lock)
|
||||
_gmap_handle_vsie_unshadow_event(parent, gfn);
|
||||
}
|
||||
|
||||
static inline bool gmap_mkold_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
|
||||
{
|
||||
return _gmap_unmap_prefix(gmap, gfn, end, true);
|
||||
}
|
||||
|
||||
static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
|
||||
{
|
||||
return _gmap_unmap_prefix(gmap, gfn, end, false);
|
||||
}
|
||||
|
||||
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
|
||||
union pgste pgste, gfn_t gfn, bool needs_lock)
|
||||
{
|
||||
lockdep_assert_held(&gmap->kvm->mmu_lock);
|
||||
if (!needs_lock)
|
||||
lockdep_assert_held(&gmap->children_lock);
|
||||
else
|
||||
lockdep_assert_not_held(&gmap->children_lock);
|
||||
|
||||
if (pgste.prefix_notif && (newpte.h.p || newpte.h.i)) {
|
||||
pgste.prefix_notif = 0;
|
||||
gmap_unmap_prefix(gmap, gfn, gfn + 1);
|
||||
}
|
||||
if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
|
||||
pgste.vsie_notif = 0;
|
||||
if (needs_lock)
|
||||
gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
else
|
||||
_gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
}
|
||||
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
|
||||
}
|
||||
|
||||
static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
|
||||
union pgste pgste, gfn_t gfn)
|
||||
{
|
||||
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
|
||||
}
|
||||
|
||||
static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn, bool needs_lock)
|
||||
{
|
||||
unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
|
||||
|
||||
lockdep_assert_held(&gmap->kvm->mmu_lock);
|
||||
if (!needs_lock)
|
||||
lockdep_assert_held(&gmap->children_lock);
|
||||
|
||||
gfn = ALIGN_DOWN(gfn, align);
|
||||
if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
|
||||
ne.s.fc1.prefix_notif = 0;
|
||||
gmap_unmap_prefix(gmap, gfn, gfn + align);
|
||||
}
|
||||
if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
|
||||
(ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
|
||||
ne.s.fc1.vsie_notif = 0;
|
||||
if (needs_lock)
|
||||
gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
else
|
||||
_gmap_handle_vsie_unshadow_event(gmap, gfn);
|
||||
}
|
||||
dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
|
||||
}
|
||||
|
||||
static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
|
||||
gfn_t gfn)
|
||||
{
|
||||
return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* gmap_is_shadow_valid() - check if a shadow guest address space matches the
|
||||
* given properties and is still valid.
|
||||
* @sg: Pointer to the shadow guest address space structure.
|
||||
* @asce: ASCE for which the shadow table is requested.
|
||||
* @edat_level: Edat level to be used for the shadow translation.
|
||||
*
|
||||
* Return: true if the gmap shadow is still valid and matches the given
|
||||
* properties and the caller can continue using it; false otherwise, the
|
||||
* caller has to request a new shadow gmap in this case.
|
||||
*/
|
||||
static inline bool gmap_is_shadow_valid(struct gmap *sg, union asce asce, int edat_level)
|
||||
{
|
||||
return sg->guest_asce.val == asce.val && sg->edat_level == edat_level;
|
||||
}
|
||||
|
||||
#endif /* ARCH_KVM_S390_GMAP_H */
|
||||
|
|
@ -21,6 +21,7 @@
|
|||
#include "gaccess.h"
|
||||
#include "trace.h"
|
||||
#include "trace-s390.h"
|
||||
#include "faultin.h"
|
||||
|
||||
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
@ -367,8 +368,11 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
|
|||
reg2, &srcaddr, GACC_FETCH, 0);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
|
||||
if (rc != 0)
|
||||
|
||||
do {
|
||||
rc = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(srcaddr), false);
|
||||
} while (rc == -EAGAIN);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Ensure that the source is paged-in, no actual access -> no key checking */
|
||||
|
|
@ -376,8 +380,11 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
|
|||
reg1, &dstaddr, GACC_STORE, 0);
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
|
||||
if (rc != 0)
|
||||
|
||||
do {
|
||||
rc = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(dstaddr), true);
|
||||
} while (rc == -EAGAIN);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <asm/sclp.h>
|
||||
#include <asm/isc.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
|
|
@ -34,6 +33,7 @@
|
|||
#include "gaccess.h"
|
||||
#include "trace-s390.h"
|
||||
#include "pci.h"
|
||||
#include "gmap.h"
|
||||
|
||||
#define PFAULT_INIT 0x0600
|
||||
#define PFAULT_DONE 0x0680
|
||||
|
|
@ -2632,12 +2632,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
|
|||
case KVM_DEV_FLIC_APF_ENABLE:
|
||||
if (kvm_is_ucontrol(dev->kvm))
|
||||
return -EINVAL;
|
||||
dev->kvm->arch.gmap->pfault_enabled = 1;
|
||||
set_bit(GMAP_FLAG_PFAULT_ENABLED, &dev->kvm->arch.gmap->flags);
|
||||
break;
|
||||
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
|
||||
if (kvm_is_ucontrol(dev->kvm))
|
||||
return -EINVAL;
|
||||
dev->kvm->arch.gmap->pfault_enabled = 0;
|
||||
clear_bit(GMAP_FLAG_PFAULT_ENABLED, &dev->kvm->arch.gmap->flags);
|
||||
/*
|
||||
* Make sure no async faults are in transition when
|
||||
* clearing the queues. So we don't need to worry
|
||||
|
|
@ -2768,13 +2768,13 @@ static int adapter_indicators_set(struct kvm *kvm,
|
|||
bit = get_ind_bit(adapter_int->ind_addr,
|
||||
adapter_int->ind_offset, adapter->swap);
|
||||
set_bit(bit, map);
|
||||
mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
|
||||
mark_page_dirty(kvm, adapter_int->ind_gaddr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(ind_page);
|
||||
map = page_address(summary_page);
|
||||
bit = get_ind_bit(adapter_int->summary_addr,
|
||||
adapter_int->summary_offset, adapter->swap);
|
||||
summary_set = test_and_set_bit(bit, map);
|
||||
mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
|
||||
mark_page_dirty(kvm, adapter_int->summary_gaddr >> PAGE_SHIFT);
|
||||
set_page_dirty_lock(summary_page);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
|
|
@ -2870,7 +2870,9 @@ int kvm_set_routing_entry(struct kvm *kvm,
|
|||
if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
|
||||
return -EFAULT;
|
||||
e->adapter.summary_addr = uaddr_s;
|
||||
e->adapter.summary_gaddr = ue->u.adapter.summary_addr;
|
||||
e->adapter.ind_addr = uaddr_i;
|
||||
e->adapter.ind_gaddr = ue->u.adapter.ind_addr;
|
||||
e->adapter.summary_offset = ue->u.adapter.summary_offset;
|
||||
e->adapter.ind_offset = ue->u.adapter.ind_offset;
|
||||
e->adapter.adapter_id = ue->u.adapter.adapter_id;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -19,9 +19,19 @@
|
|||
#include <asm/facility.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/sclp.h>
|
||||
#include "dat.h"
|
||||
#include "gmap.h"
|
||||
|
||||
#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
|
||||
|
||||
union kvm_s390_quad {
|
||||
__uint128_t sixteen;
|
||||
unsigned long eight;
|
||||
unsigned int four;
|
||||
unsigned short two;
|
||||
unsigned char one;
|
||||
};
|
||||
|
||||
static inline void kvm_s390_fpu_store(struct kvm_run *run)
|
||||
{
|
||||
fpu_stfpc(&run->s.regs.fpc);
|
||||
|
|
@ -106,9 +116,7 @@ static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
|
|||
static inline int kvm_is_ucontrol(struct kvm *kvm)
|
||||
{
|
||||
#ifdef CONFIG_KVM_S390_UCONTROL
|
||||
if (kvm->arch.gmap)
|
||||
return 0;
|
||||
return 1;
|
||||
return test_bit(GMAP_FLAG_IS_UCONTROL, &kvm->arch.gmap->flags);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
|
|
@ -432,14 +440,9 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
|
|||
/* implemented in vsie.c */
|
||||
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
unsigned long end);
|
||||
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, gpa_t start, gpa_t end);
|
||||
void kvm_s390_vsie_init(struct kvm *kvm);
|
||||
void kvm_s390_vsie_destroy(struct kvm *kvm);
|
||||
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
|
||||
|
||||
/* implemented in gmap-vsie.c */
|
||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
|
||||
|
||||
/* implemented in sigp.c */
|
||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||
|
|
@ -461,14 +464,10 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
|||
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
|
||||
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
|
||||
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
|
||||
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
|
||||
unsigned long bits);
|
||||
|
||||
static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
|
||||
{
|
||||
return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
|
||||
}
|
||||
bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu);
|
||||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
|
|
|
|||
|
|
@ -21,13 +21,14 @@
|
|||
#include <asm/ebcdic.h>
|
||||
#include <asm/sysinfo.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/sclp.h>
|
||||
#include <asm/ap.h>
|
||||
#include <asm/gmap_helpers.h>
|
||||
#include "gaccess.h"
|
||||
#include "kvm-s390.h"
|
||||
#include "trace.h"
|
||||
#include "gmap.h"
|
||||
|
||||
static int handle_ri(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
@ -222,7 +223,7 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.skey_enabled)
|
||||
return 0;
|
||||
|
||||
rc = s390_enable_skey();
|
||||
rc = gmap_enable_skeys(vcpu->arch.gmap);
|
||||
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
|
@ -255,10 +256,9 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
|
|||
|
||||
static int handle_iske(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long gaddr, vmaddr;
|
||||
unsigned char key;
|
||||
unsigned long gaddr;
|
||||
int reg1, reg2;
|
||||
bool unlocked;
|
||||
union skey key;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_iske++;
|
||||
|
|
@ -275,37 +275,21 @@ static int handle_iske(struct kvm_vcpu *vcpu)
|
|||
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
|
||||
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
|
||||
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
|
||||
if (kvm_is_error_hva(vmaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
retry:
|
||||
unlocked = false;
|
||||
mmap_read_lock(current->mm);
|
||||
rc = get_guest_storage_key(current->mm, vmaddr, &key);
|
||||
|
||||
if (rc) {
|
||||
rc = fixup_user_fault(current->mm, vmaddr,
|
||||
FAULT_FLAG_WRITE, &unlocked);
|
||||
if (!rc) {
|
||||
mmap_read_unlock(current->mm);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
mmap_read_unlock(current->mm);
|
||||
if (rc == -EFAULT)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
|
||||
rc = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gaddr), &key);
|
||||
if (rc > 0)
|
||||
return kvm_s390_inject_program_int(vcpu, rc);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
|
||||
vcpu->run->s.regs.gprs[reg1] |= key;
|
||||
vcpu->run->s.regs.gprs[reg1] |= key.skey;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int handle_rrbe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long vmaddr, gaddr;
|
||||
unsigned long gaddr;
|
||||
int reg1, reg2;
|
||||
bool unlocked;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_rrbe++;
|
||||
|
|
@ -322,24 +306,10 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
|
|||
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
|
||||
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
|
||||
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
|
||||
if (kvm_is_error_hva(vmaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
retry:
|
||||
unlocked = false;
|
||||
mmap_read_lock(current->mm);
|
||||
rc = reset_guest_reference_bit(current->mm, vmaddr);
|
||||
if (rc < 0) {
|
||||
rc = fixup_user_fault(current->mm, vmaddr,
|
||||
FAULT_FLAG_WRITE, &unlocked);
|
||||
if (!rc) {
|
||||
mmap_read_unlock(current->mm);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
mmap_read_unlock(current->mm);
|
||||
if (rc == -EFAULT)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
|
||||
rc = dat_reset_reference_bit(vcpu->arch.gmap->asce, gpa_to_gfn(gaddr));
|
||||
if (rc > 0)
|
||||
return kvm_s390_inject_program_int(vcpu, rc);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
kvm_s390_set_psw_cc(vcpu, rc);
|
||||
|
|
@ -354,9 +324,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
|
||||
unsigned long start, end;
|
||||
unsigned char key, oldkey;
|
||||
union skey key, oldkey;
|
||||
int reg1, reg2;
|
||||
bool unlocked;
|
||||
int rc;
|
||||
|
||||
vcpu->stat.instruction_sske++;
|
||||
|
|
@ -377,7 +346,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
|||
|
||||
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
|
||||
|
||||
key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
|
||||
key.skey = vcpu->run->s.regs.gprs[reg1] & 0xfe;
|
||||
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||
start = kvm_s390_logical_to_effective(vcpu, start);
|
||||
if (m3 & SSKE_MB) {
|
||||
|
|
@ -389,27 +358,17 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
while (start != end) {
|
||||
unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
||||
unlocked = false;
|
||||
|
||||
if (kvm_is_error_hva(vmaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
mmap_read_lock(current->mm);
|
||||
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
|
||||
m3 & SSKE_NQ, m3 & SSKE_MR,
|
||||
m3 & SSKE_MC);
|
||||
|
||||
if (rc < 0) {
|
||||
rc = fixup_user_fault(current->mm, vmaddr,
|
||||
FAULT_FLAG_WRITE, &unlocked);
|
||||
rc = !rc ? -EAGAIN : rc;
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
|
||||
rc = dat_cond_set_storage_key(vcpu->arch.mc, vcpu->arch.gmap->asce,
|
||||
gpa_to_gfn(start), key, &oldkey,
|
||||
m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC);
|
||||
}
|
||||
mmap_read_unlock(current->mm);
|
||||
if (rc == -EFAULT)
|
||||
if (rc > 1)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
if (rc == -EAGAIN)
|
||||
if (rc == -ENOMEM) {
|
||||
kvm_s390_mmu_cache_topup(vcpu->arch.mc);
|
||||
continue;
|
||||
}
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
start += PAGE_SIZE;
|
||||
|
|
@ -422,7 +381,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
|||
} else {
|
||||
kvm_s390_set_psw_cc(vcpu, rc);
|
||||
vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
|
||||
vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
|
||||
vcpu->run->s.regs.gprs[reg1] |= (u64)oldkey.skey << 8;
|
||||
}
|
||||
}
|
||||
if (m3 & SSKE_MB) {
|
||||
|
|
@ -1082,7 +1041,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||
bool mr = false, mc = false, nq;
|
||||
int reg1, reg2;
|
||||
unsigned long start, end;
|
||||
unsigned char key;
|
||||
union skey key;
|
||||
|
||||
vcpu->stat.instruction_pfmf++;
|
||||
|
||||
|
|
@ -1110,7 +1069,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
|
||||
key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
|
||||
key.skey = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
|
||||
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
|
||||
start = kvm_s390_logical_to_effective(vcpu, start);
|
||||
|
||||
|
|
@ -1141,14 +1100,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
while (start != end) {
|
||||
unsigned long vmaddr;
|
||||
bool unlocked = false;
|
||||
|
||||
/* Translate guest address to host address */
|
||||
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
|
||||
if (kvm_is_error_hva(vmaddr))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
|
||||
if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
|
@ -1159,19 +1110,17 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||
|
||||
if (rc)
|
||||
return rc;
|
||||
mmap_read_lock(current->mm);
|
||||
rc = cond_set_guest_storage_key(current->mm, vmaddr,
|
||||
key, NULL, nq, mr, mc);
|
||||
if (rc < 0) {
|
||||
rc = fixup_user_fault(current->mm, vmaddr,
|
||||
FAULT_FLAG_WRITE, &unlocked);
|
||||
rc = !rc ? -EAGAIN : rc;
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock) {
|
||||
rc = dat_cond_set_storage_key(vcpu->arch.mc, vcpu->arch.gmap->asce,
|
||||
gpa_to_gfn(start), key,
|
||||
NULL, nq, mr, mc);
|
||||
}
|
||||
mmap_read_unlock(current->mm);
|
||||
if (rc == -EFAULT)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
if (rc == -EAGAIN)
|
||||
if (rc > 1)
|
||||
return kvm_s390_inject_program_int(vcpu, rc);
|
||||
if (rc == -ENOMEM) {
|
||||
kvm_s390_mmu_cache_topup(vcpu->arch.mc);
|
||||
continue;
|
||||
}
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
|
|
@ -1195,8 +1144,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
{
|
||||
int r1, r2, nappended, entries;
|
||||
unsigned long gfn, hva, res, pgstev, ptev;
|
||||
union essa_state state;
|
||||
unsigned long *cbrlo;
|
||||
unsigned long gfn;
|
||||
bool dirtied;
|
||||
|
||||
/*
|
||||
* We don't need to set SD.FPF.SK to 1 here, because if we have a
|
||||
|
|
@ -1205,33 +1156,12 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
|||
|
||||
kvm_s390_get_regs_rre(vcpu, &r1, &r2);
|
||||
gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
|
||||
hva = gfn_to_hva(vcpu->kvm, gfn);
|
||||
entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
||||
|
||||
if (kvm_is_error_hva(hva))
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
|
||||
|
||||
nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
|
||||
if (nappended < 0) {
|
||||
res = orc ? 0x10 : 0;
|
||||
vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
|
||||
nappended = dat_perform_essa(vcpu->arch.gmap->asce, gfn, orc, &state, &dirtied);
|
||||
vcpu->run->s.regs.gprs[r1] = state.val;
|
||||
if (nappended < 0)
|
||||
return 0;
|
||||
}
|
||||
res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
|
||||
/*
|
||||
* Set the block-content state part of the result. 0 means resident, so
|
||||
* nothing to do if the page is valid. 2 is for preserved pages
|
||||
* (non-present and non-zero), and 3 for zero pages (non-present and
|
||||
* zero).
|
||||
*/
|
||||
if (ptev & _PAGE_INVALID) {
|
||||
res |= 2;
|
||||
if (pgstev & _PGSTE_GPS_ZERO)
|
||||
res |= 1;
|
||||
}
|
||||
if (pgstev & _PGSTE_GPS_NODAT)
|
||||
res |= 0x20;
|
||||
vcpu->run->s.regs.gprs[r1] = res;
|
||||
/*
|
||||
* It is possible that all the normal 511 slots were full, in which case
|
||||
* we will now write in the 512th slot, which is reserved for host use.
|
||||
|
|
@ -1243,17 +1173,34 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
|||
cbrlo[entries] = gfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
if (orc) {
|
||||
struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
|
||||
|
||||
/* Increment only if we are really flipping the bit */
|
||||
if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
|
||||
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
|
||||
}
|
||||
if (dirtied)
|
||||
atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
|
||||
|
||||
return nappended;
|
||||
}
|
||||
|
||||
static void _essa_clear_cbrl(struct kvm_vcpu *vcpu, unsigned long *cbrl, int len)
|
||||
{
|
||||
union crste *crstep;
|
||||
union pgste pgste;
|
||||
union pte *ptep;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&vcpu->kvm->mmu_lock);
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (dat_entry_walk(NULL, gpa_to_gfn(cbrl[i]), vcpu->arch.gmap->asce,
|
||||
0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep))
|
||||
continue;
|
||||
if (!ptep || ptep->s.pr)
|
||||
continue;
|
||||
pgste = pgste_get_lock(ptep);
|
||||
if (pgste.usage == PGSTE_GPS_USAGE_UNUSED || pgste.zero)
|
||||
gmap_helper_zap_one_page(vcpu->kvm->mm, cbrl[i]);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_essa(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
lockdep_assert_held(&vcpu->kvm->srcu);
|
||||
|
|
@ -1261,11 +1208,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
|||
/* entries expected to be 1FF */
|
||||
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
||||
unsigned long *cbrlo;
|
||||
struct gmap *gmap;
|
||||
int i, orc;
|
||||
|
||||
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
|
||||
gmap = vcpu->arch.gmap;
|
||||
vcpu->stat.instruction_essa++;
|
||||
if (!vcpu->kvm->arch.use_cmma)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
|
||||
|
|
@ -1289,11 +1234,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
|||
* value really needs to be written to; if the value is
|
||||
* already correct, we do nothing and avoid the lock.
|
||||
*/
|
||||
if (vcpu->kvm->mm->context.uses_cmm == 0) {
|
||||
mmap_write_lock(vcpu->kvm->mm);
|
||||
vcpu->kvm->mm->context.uses_cmm = 1;
|
||||
mmap_write_unlock(vcpu->kvm->mm);
|
||||
}
|
||||
set_bit(GMAP_FLAG_USES_CMM, &vcpu->arch.gmap->flags);
|
||||
/*
|
||||
* If we are here, we are supposed to have CMMA enabled in
|
||||
* the SIE block. Enabling CMMA works on a per-CPU basis,
|
||||
|
|
@ -1307,20 +1248,22 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
|||
/* Retry the ESSA instruction */
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
} else {
|
||||
mmap_read_lock(vcpu->kvm->mm);
|
||||
i = __do_essa(vcpu, orc);
|
||||
mmap_read_unlock(vcpu->kvm->mm);
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
|
||||
i = __do_essa(vcpu, orc);
|
||||
if (i < 0)
|
||||
return i;
|
||||
/* Account for the possible extra cbrl entry */
|
||||
entries += i;
|
||||
}
|
||||
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
|
||||
/* reset nceo */
|
||||
vcpu->arch.sie_block->cbrlo &= PAGE_MASK;
|
||||
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
|
||||
mmap_read_lock(gmap->mm);
|
||||
for (i = 0; i < entries; ++i)
|
||||
__gmap_zap(gmap, cbrlo[i]);
|
||||
mmap_read_unlock(gmap->mm);
|
||||
|
||||
mmap_read_lock(vcpu->kvm->mm);
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
|
||||
_essa_clear_cbrl(vcpu, cbrlo, entries);
|
||||
mmap_read_unlock(vcpu->kvm->mm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,13 +12,16 @@
|
|||
#include <linux/minmax.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
#include <asm/mman.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "dat.h"
|
||||
#include "gaccess.h"
|
||||
#include "gmap.h"
|
||||
#include "faultin.h"
|
||||
|
||||
bool kvm_s390_pv_is_protected(struct kvm *kvm)
|
||||
{
|
||||
|
|
@ -34,6 +37,85 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
|
||||
|
||||
/**
|
||||
* should_export_before_import() - Determine whether an export is needed
|
||||
* before an import-like operation.
|
||||
* @uvcb: The Ultravisor control block of the UVC to be performed.
|
||||
* @mm: The mm of the process.
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: %true if an export is needed before every import, otherwise %false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* The misc feature indicates, among other things, that importing a
|
||||
* shared page from a different protected VM will automatically also
|
||||
* transfer its ownership.
|
||||
*/
|
||||
if (uv_has_feature(BIT_UV_FEAT_MISC))
|
||||
return false;
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
struct pv_make_secure {
|
||||
void *uvcb;
|
||||
struct folio *folio;
|
||||
int rc;
|
||||
bool needs_export;
|
||||
};
|
||||
|
||||
static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio)
|
||||
{
|
||||
struct pv_make_secure *priv = f->priv;
|
||||
int rc;
|
||||
|
||||
if (priv->needs_export)
|
||||
uv_convert_from_secure(folio_to_phys(folio));
|
||||
|
||||
if (folio_test_hugetlb(folio))
|
||||
return -EFAULT;
|
||||
if (folio_test_large(folio))
|
||||
return -E2BIG;
|
||||
|
||||
if (!f->page)
|
||||
folio_get(folio);
|
||||
rc = __make_folio_secure(folio, priv->uvcb);
|
||||
if (!f->page)
|
||||
folio_put(folio);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void _kvm_s390_pv_make_secure(struct guest_fault *f)
|
||||
{
|
||||
struct pv_make_secure *priv = f->priv;
|
||||
struct folio *folio;
|
||||
|
||||
folio = pfn_folio(f->pfn);
|
||||
priv->rc = -EAGAIN;
|
||||
if (folio_trylock(folio)) {
|
||||
priv->rc = __kvm_s390_pv_make_secure(f, folio);
|
||||
if (priv->rc == -E2BIG || priv->rc == -EBUSY) {
|
||||
priv->folio = folio;
|
||||
folio_get(folio);
|
||||
}
|
||||
folio_unlock(folio);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_pv_make_secure() - make one guest page secure
|
||||
* @kvm: the guest
|
||||
|
|
@ -45,14 +127,34 @@ EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
|
|||
*/
|
||||
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
|
||||
{
|
||||
unsigned long vmaddr;
|
||||
struct pv_make_secure priv = { .uvcb = uvcb };
|
||||
struct guest_fault f = {
|
||||
.write_attempt = true,
|
||||
.gfn = gpa_to_gfn(gaddr),
|
||||
.callback = _kvm_s390_pv_make_secure,
|
||||
.priv = &priv,
|
||||
};
|
||||
int rc;
|
||||
|
||||
lockdep_assert_held(&kvm->srcu);
|
||||
|
||||
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
|
||||
if (kvm_is_error_hva(vmaddr))
|
||||
return -EFAULT;
|
||||
return make_hva_secure(kvm->mm, vmaddr, uvcb);
|
||||
priv.needs_export = should_export_before_import(uvcb, kvm->mm);
|
||||
|
||||
scoped_guard(mutex, &kvm->arch.pv.import_lock) {
|
||||
rc = kvm_s390_faultin_gfn(NULL, kvm, &f);
|
||||
|
||||
if (!rc) {
|
||||
rc = priv.rc;
|
||||
if (priv.folio) {
|
||||
rc = s390_wiggle_split_folio(kvm->mm, priv.folio);
|
||||
if (!rc)
|
||||
rc = -EAGAIN;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (priv.folio)
|
||||
folio_put(priv.folio);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
|
||||
|
|
@ -299,35 +401,6 @@ done_fast:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
|
||||
* @kvm: the VM whose memory is to be cleared.
|
||||
*
|
||||
* Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
|
||||
* The CPUs of the protected VM need to be destroyed beforehand.
|
||||
*/
|
||||
static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
|
||||
{
|
||||
const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long len;
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
/* Take the memslot containing guest absolute address 0 */
|
||||
slot = gfn_to_memslot(kvm, 0);
|
||||
/* Clear all slots or parts thereof that are below 2GB */
|
||||
while (slot && slot->base_gfn < pages_2g) {
|
||||
len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
|
||||
s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
|
||||
/* Take the next memslot */
|
||||
slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
}
|
||||
|
||||
static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_destroy_fast uvcb = {
|
||||
|
|
@ -342,7 +415,6 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
*rc = uvcb.header.rc;
|
||||
if (rrc)
|
||||
*rrc = uvcb.header.rrc;
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
|
||||
uvcb.header.rc, uvcb.header.rrc);
|
||||
WARN_ONCE(cc && uvcb.header.rc != 0x104,
|
||||
|
|
@ -391,7 +463,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
return -EINVAL;
|
||||
|
||||
/* Guest with segment type ASCE, refuse to destroy asynchronously */
|
||||
if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
|
||||
if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT)
|
||||
return -EINVAL;
|
||||
|
||||
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
||||
|
|
@ -404,8 +476,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
priv->stor_var = kvm->arch.pv.stor_var;
|
||||
priv->stor_base = kvm->arch.pv.stor_base;
|
||||
priv->handle = kvm_s390_pv_get_handle(kvm);
|
||||
priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce);
|
||||
if (s390_replace_asce(kvm->arch.gmap))
|
||||
res = -ENOMEM;
|
||||
}
|
||||
|
|
@ -415,7 +486,7 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
return res;
|
||||
}
|
||||
|
||||
kvm_s390_destroy_lower_2g(kvm);
|
||||
gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false);
|
||||
kvm_s390_clear_pv_state(kvm);
|
||||
kvm->arch.pv.set_aside = priv;
|
||||
|
||||
|
|
@ -449,7 +520,6 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
|
||||
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
||||
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
if (!cc) {
|
||||
atomic_dec(&kvm->mm->context.protected_count);
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
|
|
@ -532,7 +602,7 @@ int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
* cleanup has been performed.
|
||||
*/
|
||||
if (need_zap && mmget_not_zero(kvm->mm)) {
|
||||
s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
|
||||
gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false);
|
||||
mmput(kvm->mm);
|
||||
}
|
||||
|
||||
|
|
@ -570,7 +640,7 @@ int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
return -EINVAL;
|
||||
|
||||
/* When a fatal signal is received, stop immediately */
|
||||
if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
|
||||
if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true))
|
||||
goto done;
|
||||
if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
|
||||
ret = -EIO;
|
||||
|
|
@ -609,6 +679,7 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
|
|||
r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
|
||||
if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
|
||||
kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
|
||||
set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
|
||||
|
|
@ -642,7 +713,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
/* Inputs */
|
||||
uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
|
||||
uvcb.guest_stor_len = kvm->arch.pv.guest_len;
|
||||
uvcb.guest_asce = kvm->arch.gmap->asce;
|
||||
uvcb.guest_asce = kvm->arch.gmap->asce.val;
|
||||
uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
|
||||
uvcb.conf_base_stor_origin =
|
||||
virt_to_phys((void *)kvm->arch.pv.stor_base);
|
||||
|
|
@ -650,6 +721,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
|
||||
uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
|
||||
|
||||
clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags);
|
||||
gmap_split_huge_pages(kvm->arch.gmap);
|
||||
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
|
|
@ -669,7 +743,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
|||
}
|
||||
return -EIO;
|
||||
}
|
||||
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -704,26 +777,14 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
|
|||
.tweak[1] = offset,
|
||||
};
|
||||
int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
|
||||
unsigned long vmaddr;
|
||||
bool unlocked;
|
||||
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
|
||||
if (ret == -ENXIO) {
|
||||
mmap_read_lock(kvm->mm);
|
||||
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
|
||||
if (kvm_is_error_hva(vmaddr)) {
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
|
||||
if (!ret)
|
||||
ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
|
||||
}
|
||||
mmap_read_unlock(kvm->mm);
|
||||
ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true);
|
||||
if (!ret)
|
||||
return -EAGAIN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret && ret != -EAGAIN)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@
|
|||
#include <linux/io.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/sclp.h>
|
||||
#include <asm/nmi.h>
|
||||
|
|
@ -23,6 +22,7 @@
|
|||
#include <asm/facility.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "gmap.h"
|
||||
|
||||
enum vsie_page_flags {
|
||||
VSIE_PAGE_IN_USE = 0,
|
||||
|
|
@ -41,8 +41,11 @@ struct vsie_page {
|
|||
* are reused conditionally, should be accessed via READ_ONCE.
|
||||
*/
|
||||
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
|
||||
/* the shadow gmap in use by the vsie_page */
|
||||
struct gmap *gmap; /* 0x0220 */
|
||||
/*
|
||||
* Flags: must be set/cleared atomically after the vsie page can be
|
||||
* looked up by other CPUs.
|
||||
*/
|
||||
unsigned long flags; /* 0x0220 */
|
||||
/* address of the last reported fault to guest2 */
|
||||
unsigned long fault_addr; /* 0x0228 */
|
||||
/* calculated guest addresses of satellite control blocks */
|
||||
|
|
@ -57,33 +60,14 @@ struct vsie_page {
|
|||
* radix tree.
|
||||
*/
|
||||
gpa_t scb_gpa; /* 0x0258 */
|
||||
/*
|
||||
* Flags: must be set/cleared atomically after the vsie page can be
|
||||
* looked up by other CPUs.
|
||||
*/
|
||||
unsigned long flags; /* 0x0260 */
|
||||
__u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
|
||||
/* the shadow gmap in use by the vsie_page */
|
||||
struct gmap_cache gmap_cache; /* 0x0260 */
|
||||
__u8 reserved[0x0700 - 0x0278]; /* 0x0278 */
|
||||
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
|
||||
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
|
||||
};
|
||||
|
||||
/**
|
||||
* gmap_shadow_valid() - check if a shadow guest address space matches the
|
||||
* given properties and is still valid
|
||||
* @sg: pointer to the shadow guest address space structure
|
||||
* @asce: ASCE for which the shadow table is requested
|
||||
* @edat_level: edat level to be used for the shadow translation
|
||||
*
|
||||
* Returns 1 if the gmap shadow is still valid and matches the given
|
||||
* properties, the caller can continue using it. Returns 0 otherwise; the
|
||||
* caller has to request a new shadow gmap in this case.
|
||||
*/
|
||||
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
|
||||
{
|
||||
if (sg->removed)
|
||||
return 0;
|
||||
return sg->orig_asce == asce && sg->edat_level == edat_level;
|
||||
}
|
||||
static_assert(sizeof(struct vsie_page) == PAGE_SIZE);
|
||||
|
||||
/* trigger a validity icpt for the given scb */
|
||||
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
|
||||
|
|
@ -612,26 +596,17 @@ out:
|
|||
return rc;
|
||||
}
|
||||
|
||||
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||
unsigned long end)
|
||||
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, gpa_t start, gpa_t end)
|
||||
{
|
||||
struct kvm *kvm = gmap->private;
|
||||
struct vsie_page *cur;
|
||||
struct vsie_page *cur, *next;
|
||||
unsigned long prefix;
|
||||
int i;
|
||||
|
||||
if (!gmap_is_shadow(gmap))
|
||||
return;
|
||||
KVM_BUG_ON(!test_bit(GMAP_FLAG_SHADOW, &gmap->flags), gmap->kvm);
|
||||
/*
|
||||
* Only new shadow blocks are added to the list during runtime,
|
||||
* therefore we can safely reference them all the time.
|
||||
*/
|
||||
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
|
||||
cur = READ_ONCE(kvm->arch.vsie.pages[i]);
|
||||
if (!cur)
|
||||
continue;
|
||||
if (READ_ONCE(cur->gmap) != gmap)
|
||||
continue;
|
||||
list_for_each_entry_safe(cur, next, &gmap->scb_users, gmap_cache.list) {
|
||||
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
|
||||
/* with mso/msl, the prefix lies at an offset */
|
||||
prefix += cur->scb_s.mso;
|
||||
|
|
@ -652,7 +627,7 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
|||
* - -EAGAIN if the caller can retry immediately
|
||||
* - -ENOMEM if out of memory
|
||||
*/
|
||||
static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
|
||||
|
|
@ -667,10 +642,9 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
/* with mso/msl, the prefix lies at offset *mso* */
|
||||
prefix += scb_s->mso;
|
||||
|
||||
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
|
||||
rc = gaccess_shadow_fault(vcpu, sg, prefix, NULL, true);
|
||||
if (!rc && (scb_s->ecb & ECB_TE))
|
||||
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
|
||||
prefix + PAGE_SIZE, NULL);
|
||||
rc = gaccess_shadow_fault(vcpu, sg, prefix + PAGE_SIZE, NULL, true);
|
||||
/*
|
||||
* We don't have to mprotect, we will be called for all unshadows.
|
||||
* SIE will detect if protection applies and trigger a validity.
|
||||
|
|
@ -951,8 +925,9 @@ static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
|
|||
* - > 0 if control has to be given to guest 2
|
||||
* - < 0 if an error occurred
|
||||
*/
|
||||
static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
|
||||
{
|
||||
bool wr = kvm_s390_cur_gmap_fault_is_write();
|
||||
int rc;
|
||||
|
||||
if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
|
||||
|
|
@ -960,12 +935,10 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
return inject_fault(vcpu, PGM_PROTECTION,
|
||||
current->thread.gmap_teid.addr * PAGE_SIZE, 1);
|
||||
|
||||
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
|
||||
current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
|
||||
rc = gaccess_shadow_fault(vcpu, sg, current->thread.gmap_teid.addr * PAGE_SIZE, NULL, wr);
|
||||
if (rc > 0) {
|
||||
rc = inject_fault(vcpu, rc,
|
||||
current->thread.gmap_teid.addr * PAGE_SIZE,
|
||||
kvm_s390_cur_gmap_fault_is_write());
|
||||
current->thread.gmap_teid.addr * PAGE_SIZE, wr);
|
||||
if (rc >= 0)
|
||||
vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
|
||||
}
|
||||
|
|
@ -978,12 +951,10 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
*
|
||||
* Will ignore any errors. The next SIE fault will do proper fault handling.
|
||||
*/
|
||||
static void handle_last_fault(struct kvm_vcpu *vcpu,
|
||||
struct vsie_page *vsie_page)
|
||||
static void handle_last_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
|
||||
{
|
||||
if (vsie_page->fault_addr)
|
||||
kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
|
||||
vsie_page->fault_addr, NULL);
|
||||
gaccess_shadow_fault(vcpu, sg, vsie_page->fault_addr, NULL, true);
|
||||
vsie_page->fault_addr = 0;
|
||||
}
|
||||
|
||||
|
|
@ -1065,11 +1036,12 @@ static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
|
|||
}
|
||||
}
|
||||
|
||||
static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
unsigned long pei_dest, pei_src, src, dest, mask, prefix;
|
||||
unsigned long src, dest, mask, prefix;
|
||||
u64 *pei_block = &vsie_page->scb_o->mcic;
|
||||
union mvpg_pei pei_dest, pei_src;
|
||||
int edat, rc_dest, rc_src;
|
||||
union ctlreg0 cr0;
|
||||
|
||||
|
|
@ -1083,8 +1055,8 @@ static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
|
||||
src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
|
||||
|
||||
rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
|
||||
rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
|
||||
rc_dest = gaccess_shadow_fault(vcpu, sg, dest, &pei_dest, true);
|
||||
rc_src = gaccess_shadow_fault(vcpu, sg, src, &pei_src, false);
|
||||
/*
|
||||
* Either everything went well, or something non-critical went wrong
|
||||
* e.g. because of a race. In either case, simply retry.
|
||||
|
|
@ -1119,8 +1091,8 @@ static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
|
||||
}
|
||||
if (!rc_dest && !rc_src) {
|
||||
pei_block[0] = pei_dest;
|
||||
pei_block[1] = pei_src;
|
||||
pei_block[0] = pei_dest.val;
|
||||
pei_block[1] = pei_src.val;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -1144,7 +1116,7 @@ static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
* - > 0 if control has to be given to guest 2
|
||||
* - < 0 if an error occurred
|
||||
*/
|
||||
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struct gmap *sg)
|
||||
__releases(vcpu->kvm->srcu)
|
||||
__acquires(vcpu->kvm->srcu)
|
||||
{
|
||||
|
|
@ -1153,7 +1125,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
int guest_bp_isolation;
|
||||
int rc = 0;
|
||||
|
||||
handle_last_fault(vcpu, vsie_page);
|
||||
handle_last_fault(vcpu, vsie_page, sg);
|
||||
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
|
||||
|
|
@ -1191,7 +1163,7 @@ xfer_to_guest_mode_check:
|
|||
goto xfer_to_guest_mode_check;
|
||||
}
|
||||
guest_timing_enter_irqoff();
|
||||
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
|
||||
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
|
||||
guest_timing_exit_irqoff();
|
||||
local_irq_enable();
|
||||
}
|
||||
|
|
@ -1215,7 +1187,7 @@ skip_sie:
|
|||
if (rc > 0)
|
||||
rc = 0; /* we could still have an icpt */
|
||||
else if (current->thread.gmap_int_code)
|
||||
return handle_fault(vcpu, vsie_page);
|
||||
return handle_fault(vcpu, vsie_page, sg);
|
||||
|
||||
switch (scb_s->icptcode) {
|
||||
case ICPT_INST:
|
||||
|
|
@ -1233,7 +1205,7 @@ skip_sie:
|
|||
break;
|
||||
case ICPT_PARTEXEC:
|
||||
if (scb_s->ipa == 0xb254)
|
||||
rc = vsie_handle_mvpg(vcpu, vsie_page);
|
||||
rc = vsie_handle_mvpg(vcpu, vsie_page, sg);
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
|
|
@ -1241,43 +1213,67 @@ skip_sie:
|
|||
|
||||
static void release_gmap_shadow(struct vsie_page *vsie_page)
|
||||
{
|
||||
if (vsie_page->gmap)
|
||||
gmap_put(vsie_page->gmap);
|
||||
WRITE_ONCE(vsie_page->gmap, NULL);
|
||||
struct gmap *gmap = vsie_page->gmap_cache.gmap;
|
||||
|
||||
lockdep_assert_held(&gmap->kvm->arch.gmap->children_lock);
|
||||
|
||||
list_del(&vsie_page->gmap_cache.list);
|
||||
vsie_page->gmap_cache.gmap = NULL;
|
||||
prefix_unmapped(vsie_page);
|
||||
|
||||
if (list_empty(&gmap->scb_users)) {
|
||||
gmap_remove_child(gmap);
|
||||
gmap_put(gmap);
|
||||
}
|
||||
}
|
||||
|
||||
static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
|
||||
struct vsie_page *vsie_page)
|
||||
static struct gmap *acquire_gmap_shadow(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
unsigned long asce;
|
||||
union ctlreg0 cr0;
|
||||
struct gmap *gmap;
|
||||
union asce asce;
|
||||
int edat;
|
||||
|
||||
asce = vcpu->arch.sie_block->gcr[1];
|
||||
asce.val = vcpu->arch.sie_block->gcr[1];
|
||||
cr0.val = vcpu->arch.sie_block->gcr[0];
|
||||
edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
|
||||
edat += edat && test_kvm_facility(vcpu->kvm, 78);
|
||||
|
||||
/*
|
||||
* ASCE or EDAT could have changed since last icpt, or the gmap
|
||||
* we're holding has been unshadowed. If the gmap is still valid,
|
||||
* we can safely reuse it.
|
||||
*/
|
||||
if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
|
||||
vcpu->kvm->stat.gmap_shadow_reuse++;
|
||||
return 0;
|
||||
scoped_guard(spinlock, &vcpu->kvm->arch.gmap->children_lock) {
|
||||
gmap = vsie_page->gmap_cache.gmap;
|
||||
if (gmap) {
|
||||
/*
|
||||
* ASCE or EDAT could have changed since last icpt, or the gmap
|
||||
* we're holding has been unshadowed. If the gmap is still valid,
|
||||
* we can safely reuse it.
|
||||
*/
|
||||
if (gmap_is_shadow_valid(gmap, asce, edat)) {
|
||||
vcpu->kvm->stat.gmap_shadow_reuse++;
|
||||
gmap_get(gmap);
|
||||
return gmap;
|
||||
}
|
||||
/* release the old shadow and mark the prefix as unmapped */
|
||||
release_gmap_shadow(vsie_page);
|
||||
}
|
||||
}
|
||||
|
||||
/* release the old shadow - if any, and mark the prefix as unmapped */
|
||||
release_gmap_shadow(vsie_page);
|
||||
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
|
||||
again:
|
||||
gmap = gmap_create_shadow(vcpu->arch.mc, vcpu->kvm->arch.gmap, asce, edat);
|
||||
if (IS_ERR(gmap))
|
||||
return PTR_ERR(gmap);
|
||||
vcpu->kvm->stat.gmap_shadow_create++;
|
||||
WRITE_ONCE(vsie_page->gmap, gmap);
|
||||
return 0;
|
||||
return gmap;
|
||||
scoped_guard(spinlock, &vcpu->kvm->arch.gmap->children_lock) {
|
||||
/* unlikely race condition, remove the previous shadow */
|
||||
if (vsie_page->gmap_cache.gmap)
|
||||
release_gmap_shadow(vsie_page);
|
||||
if (!gmap->parent) {
|
||||
gmap_put(gmap);
|
||||
goto again;
|
||||
}
|
||||
vcpu->kvm->stat.gmap_shadow_create++;
|
||||
list_add(&vsie_page->gmap_cache.list, &gmap->scb_users);
|
||||
vsie_page->gmap_cache.gmap = gmap;
|
||||
prefix_unmapped(vsie_page);
|
||||
}
|
||||
return gmap;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1330,15 +1326,20 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
|
|||
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
{
|
||||
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
|
||||
struct gmap *sg;
|
||||
int rc = 0;
|
||||
|
||||
while (1) {
|
||||
rc = acquire_gmap_shadow(vcpu, vsie_page);
|
||||
sg = acquire_gmap_shadow(vcpu, vsie_page);
|
||||
if (IS_ERR(sg)) {
|
||||
rc = PTR_ERR(sg);
|
||||
sg = NULL;
|
||||
}
|
||||
if (!rc)
|
||||
rc = map_prefix(vcpu, vsie_page);
|
||||
rc = map_prefix(vcpu, vsie_page, sg);
|
||||
if (!rc) {
|
||||
update_intervention_requests(vsie_page);
|
||||
rc = do_vsie_run(vcpu, vsie_page);
|
||||
rc = do_vsie_run(vcpu, vsie_page, sg);
|
||||
}
|
||||
atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
|
||||
|
||||
|
|
@ -1361,6 +1362,9 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
|||
kvm_s390_rewind_psw(vcpu, 4);
|
||||
break;
|
||||
}
|
||||
if (sg)
|
||||
sg = gmap_put(sg);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (rc == -EFAULT) {
|
||||
|
|
@ -1457,8 +1461,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
|
|||
vsie_page->scb_gpa = ULONG_MAX;
|
||||
|
||||
/* Double use of the same address or allocation failure. */
|
||||
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
|
||||
vsie_page)) {
|
||||
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, vsie_page)) {
|
||||
put_vsie_page(vsie_page);
|
||||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
return NULL;
|
||||
|
|
@ -1467,7 +1470,12 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
|
|||
mutex_unlock(&kvm->arch.vsie.mutex);
|
||||
|
||||
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
|
||||
release_gmap_shadow(vsie_page);
|
||||
if (vsie_page->gmap_cache.gmap) {
|
||||
scoped_guard(spinlock, &kvm->arch.gmap->children_lock)
|
||||
if (vsie_page->gmap_cache.gmap)
|
||||
release_gmap_shadow(vsie_page);
|
||||
}
|
||||
prefix_unmapped(vsie_page);
|
||||
vsie_page->fault_addr = 0;
|
||||
vsie_page->scb_s.ihcpu = 0xffffU;
|
||||
return vsie_page;
|
||||
|
|
@ -1498,11 +1506,13 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
|
||||
if (IS_ERR(vsie_page))
|
||||
if (IS_ERR(vsie_page)) {
|
||||
return PTR_ERR(vsie_page);
|
||||
else if (!vsie_page)
|
||||
} else if (!vsie_page) {
|
||||
/* double use of sie control block - simply do nothing */
|
||||
kvm_s390_rewind_psw(vcpu, 4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = pin_scb(vcpu, vsie_page, scb_addr);
|
||||
if (rc)
|
||||
|
|
@ -1543,8 +1553,10 @@ void kvm_s390_vsie_destroy(struct kvm *kvm)
|
|||
mutex_lock(&kvm->arch.vsie.mutex);
|
||||
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
|
||||
vsie_page = kvm->arch.vsie.pages[i];
|
||||
scoped_guard(spinlock, &kvm->arch.gmap->children_lock)
|
||||
if (vsie_page->gmap_cache.gmap)
|
||||
release_gmap_shadow(vsie_page);
|
||||
kvm->arch.vsie.pages[i] = NULL;
|
||||
release_gmap_shadow(vsie_page);
|
||||
/* free the radix tree entry */
|
||||
if (vsie_page->scb_gpa != ULONG_MAX)
|
||||
radix_tree_delete(&kvm->arch.vsie.addr_to_page,
|
||||
|
|
|
|||
|
|
@ -34,136 +34,19 @@ void debug_user_asce(int exit)
|
|||
}
|
||||
#endif /*CONFIG_DEBUG_ENTRY */
|
||||
|
||||
union oac {
|
||||
unsigned int val;
|
||||
struct {
|
||||
struct {
|
||||
unsigned short key : 4;
|
||||
unsigned short : 4;
|
||||
unsigned short as : 2;
|
||||
unsigned short : 4;
|
||||
unsigned short k : 1;
|
||||
unsigned short a : 1;
|
||||
} oac1;
|
||||
struct {
|
||||
unsigned short key : 4;
|
||||
unsigned short : 4;
|
||||
unsigned short as : 2;
|
||||
unsigned short : 4;
|
||||
unsigned short k : 1;
|
||||
unsigned short a : 1;
|
||||
} oac2;
|
||||
};
|
||||
};
|
||||
|
||||
static uaccess_kmsan_or_inline __must_check unsigned long
|
||||
raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
|
||||
{
|
||||
unsigned long osize;
|
||||
union oac spec = {
|
||||
.oac2.key = key,
|
||||
.oac2.as = PSW_BITS_AS_SECONDARY,
|
||||
.oac2.k = 1,
|
||||
.oac2.a = 1,
|
||||
};
|
||||
int cc;
|
||||
|
||||
while (1) {
|
||||
osize = size;
|
||||
asm_inline volatile(
|
||||
" lr %%r0,%[spec]\n"
|
||||
"0: mvcos %[to],%[from],%[size]\n"
|
||||
"1: nopr %%r7\n"
|
||||
CC_IPM(cc)
|
||||
EX_TABLE_UA_MVCOS_FROM(0b, 0b)
|
||||
EX_TABLE_UA_MVCOS_FROM(1b, 0b)
|
||||
: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
|
||||
: [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
|
||||
: CC_CLOBBER_LIST("memory", "0"));
|
||||
if (CC_TRANSFORM(cc) == 0)
|
||||
return osize - size;
|
||||
size -= 4096;
|
||||
to += 4096;
|
||||
from += 4096;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long _copy_from_user_key(void *to, const void __user *from,
|
||||
unsigned long n, unsigned long key)
|
||||
{
|
||||
unsigned long res = n;
|
||||
|
||||
might_fault();
|
||||
if (!should_fail_usercopy()) {
|
||||
instrument_copy_from_user_before(to, from, n);
|
||||
res = raw_copy_from_user_key(to, from, n, key);
|
||||
instrument_copy_from_user_after(to, from, n, res);
|
||||
}
|
||||
if (unlikely(res))
|
||||
memset(to + (n - res), 0, res);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(_copy_from_user_key);
|
||||
|
||||
static uaccess_kmsan_or_inline __must_check unsigned long
|
||||
raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
|
||||
{
|
||||
unsigned long osize;
|
||||
union oac spec = {
|
||||
.oac1.key = key,
|
||||
.oac1.as = PSW_BITS_AS_SECONDARY,
|
||||
.oac1.k = 1,
|
||||
.oac1.a = 1,
|
||||
};
|
||||
int cc;
|
||||
|
||||
while (1) {
|
||||
osize = size;
|
||||
asm_inline volatile(
|
||||
" lr %%r0,%[spec]\n"
|
||||
"0: mvcos %[to],%[from],%[size]\n"
|
||||
"1: nopr %%r7\n"
|
||||
CC_IPM(cc)
|
||||
EX_TABLE_UA_MVCOS_TO(0b, 0b)
|
||||
EX_TABLE_UA_MVCOS_TO(1b, 0b)
|
||||
: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
|
||||
: [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
|
||||
: CC_CLOBBER_LIST("memory", "0"));
|
||||
if (CC_TRANSFORM(cc) == 0)
|
||||
return osize - size;
|
||||
size -= 4096;
|
||||
to += 4096;
|
||||
from += 4096;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long _copy_to_user_key(void __user *to, const void *from,
|
||||
unsigned long n, unsigned long key)
|
||||
{
|
||||
might_fault();
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
instrument_copy_to_user(to, from, n);
|
||||
return raw_copy_to_user_key(to, from, n, key);
|
||||
}
|
||||
EXPORT_SYMBOL(_copy_to_user_key);
|
||||
|
||||
#define CMPXCHG_USER_KEY_MAX_LOOPS 128
|
||||
|
||||
static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
|
||||
unsigned int old, unsigned int new,
|
||||
unsigned int mask, unsigned long key)
|
||||
static nokprobe_inline int __cmpxchg_key_small(void *address, unsigned int *uval,
|
||||
unsigned int old, unsigned int new,
|
||||
unsigned int mask, unsigned long key)
|
||||
{
|
||||
unsigned long count;
|
||||
unsigned int prev;
|
||||
bool sacf_flag;
|
||||
int rc = 0;
|
||||
|
||||
skey_regions_initialize();
|
||||
sacf_flag = enable_sacf_uaccess();
|
||||
asm_inline volatile(
|
||||
"20: spka 0(%[key])\n"
|
||||
" sacf 256\n"
|
||||
" llill %[count],%[max_loops]\n"
|
||||
"0: l %[prev],%[address]\n"
|
||||
"1: nr %[prev],%[mask]\n"
|
||||
|
|
@ -178,8 +61,7 @@ static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsig
|
|||
" nr %[tmp],%[mask]\n"
|
||||
" jnz 5f\n"
|
||||
" brct %[count],2b\n"
|
||||
"5: sacf 768\n"
|
||||
" spka %[default_key]\n"
|
||||
"5: spka %[default_key]\n"
|
||||
"21:\n"
|
||||
EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
|
||||
EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
|
||||
|
|
@ -197,16 +79,16 @@ static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsig
|
|||
[default_key] "J" (PAGE_DEFAULT_KEY),
|
||||
[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
|
||||
: "memory", "cc");
|
||||
disable_sacf_uaccess(sacf_flag);
|
||||
*uval = prev;
|
||||
if (!count)
|
||||
rc = -EAGAIN;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
|
||||
unsigned char old, unsigned char new, unsigned long key)
|
||||
int __kprobes __cmpxchg_key1(void *addr, unsigned char *uval, unsigned char old,
|
||||
unsigned char new, unsigned long key)
|
||||
{
|
||||
unsigned long address = (unsigned long)addr;
|
||||
unsigned int prev, shift, mask, _old, _new;
|
||||
int rc;
|
||||
|
||||
|
|
@ -215,15 +97,16 @@ int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
|
|||
_old = (unsigned int)old << shift;
|
||||
_new = (unsigned int)new << shift;
|
||||
mask = ~(0xff << shift);
|
||||
rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
|
||||
rc = __cmpxchg_key_small((void *)address, &prev, _old, _new, mask, key);
|
||||
*uval = prev >> shift;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_user_key1);
|
||||
EXPORT_SYMBOL(__cmpxchg_key1);
|
||||
|
||||
int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
|
||||
unsigned short old, unsigned short new, unsigned long key)
|
||||
int __kprobes __cmpxchg_key2(void *addr, unsigned short *uval, unsigned short old,
|
||||
unsigned short new, unsigned long key)
|
||||
{
|
||||
unsigned long address = (unsigned long)addr;
|
||||
unsigned int prev, shift, mask, _old, _new;
|
||||
int rc;
|
||||
|
||||
|
|
@ -232,27 +115,23 @@ int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
|
|||
_old = (unsigned int)old << shift;
|
||||
_new = (unsigned int)new << shift;
|
||||
mask = ~(0xffff << shift);
|
||||
rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
|
||||
rc = __cmpxchg_key_small((void *)address, &prev, _old, _new, mask, key);
|
||||
*uval = prev >> shift;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_user_key2);
|
||||
EXPORT_SYMBOL(__cmpxchg_key2);
|
||||
|
||||
int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
|
||||
unsigned int old, unsigned int new, unsigned long key)
|
||||
int __kprobes __cmpxchg_key4(void *address, unsigned int *uval, unsigned int old,
|
||||
unsigned int new, unsigned long key)
|
||||
{
|
||||
unsigned int prev = old;
|
||||
bool sacf_flag;
|
||||
int rc = 0;
|
||||
|
||||
skey_regions_initialize();
|
||||
sacf_flag = enable_sacf_uaccess();
|
||||
asm_inline volatile(
|
||||
"20: spka 0(%[key])\n"
|
||||
" sacf 256\n"
|
||||
"0: cs %[prev],%[new],%[address]\n"
|
||||
"1: sacf 768\n"
|
||||
" spka %[default_key]\n"
|
||||
"1: spka %[default_key]\n"
|
||||
"21:\n"
|
||||
EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
|
||||
EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
|
||||
|
|
@ -264,27 +143,22 @@ int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
|
|||
[key] "a" (key << 4),
|
||||
[default_key] "J" (PAGE_DEFAULT_KEY)
|
||||
: "memory", "cc");
|
||||
disable_sacf_uaccess(sacf_flag);
|
||||
*uval = prev;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_user_key4);
|
||||
EXPORT_SYMBOL(__cmpxchg_key4);
|
||||
|
||||
int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
|
||||
unsigned long old, unsigned long new, unsigned long key)
|
||||
int __kprobes __cmpxchg_key8(void *address, unsigned long *uval, unsigned long old,
|
||||
unsigned long new, unsigned long key)
|
||||
{
|
||||
unsigned long prev = old;
|
||||
bool sacf_flag;
|
||||
int rc = 0;
|
||||
|
||||
skey_regions_initialize();
|
||||
sacf_flag = enable_sacf_uaccess();
|
||||
asm_inline volatile(
|
||||
"20: spka 0(%[key])\n"
|
||||
" sacf 256\n"
|
||||
"0: csg %[prev],%[new],%[address]\n"
|
||||
"1: sacf 768\n"
|
||||
" spka %[default_key]\n"
|
||||
"1: spka %[default_key]\n"
|
||||
"21:\n"
|
||||
EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
|
||||
EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
|
||||
|
|
@ -296,27 +170,22 @@ int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
|
|||
[key] "a" (key << 4),
|
||||
[default_key] "J" (PAGE_DEFAULT_KEY)
|
||||
: "memory", "cc");
|
||||
disable_sacf_uaccess(sacf_flag);
|
||||
*uval = prev;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_user_key8);
|
||||
EXPORT_SYMBOL(__cmpxchg_key8);
|
||||
|
||||
int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
|
||||
__uint128_t old, __uint128_t new, unsigned long key)
|
||||
int __kprobes __cmpxchg_key16(void *address, __uint128_t *uval, __uint128_t old,
|
||||
__uint128_t new, unsigned long key)
|
||||
{
|
||||
__uint128_t prev = old;
|
||||
bool sacf_flag;
|
||||
int rc = 0;
|
||||
|
||||
skey_regions_initialize();
|
||||
sacf_flag = enable_sacf_uaccess();
|
||||
asm_inline volatile(
|
||||
"20: spka 0(%[key])\n"
|
||||
" sacf 256\n"
|
||||
"0: cdsg %[prev],%[new],%[address]\n"
|
||||
"1: sacf 768\n"
|
||||
" spka %[default_key]\n"
|
||||
"1: spka %[default_key]\n"
|
||||
"21:\n"
|
||||
EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
|
||||
EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
|
||||
|
|
@ -328,8 +197,7 @@ int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
|
|||
[key] "a" (key << 4),
|
||||
[default_key] "J" (PAGE_DEFAULT_KEY)
|
||||
: "memory", "cc");
|
||||
disable_sacf_uaccess(sacf_flag);
|
||||
*uval = prev;
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(__cmpxchg_user_key16);
|
||||
EXPORT_SYMBOL(__cmpxchg_key16);
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ obj-$(CONFIG_CMM) += cmm.o
|
|||
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_PTDUMP) += dump_pagetables.o
|
||||
obj-$(CONFIG_PGSTE) += gmap.o
|
||||
obj-$(CONFIG_PFAULT) += pfault.o
|
||||
|
||||
obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
|
||||
|
|
|
|||
|
|
@ -403,7 +403,7 @@ void do_dat_exception(struct pt_regs *regs)
|
|||
}
|
||||
NOKPROBE_SYMBOL(do_dat_exception);
|
||||
|
||||
#if IS_ENABLED(CONFIG_PGSTE)
|
||||
#if IS_ENABLED(CONFIG_KVM)
|
||||
|
||||
void do_secure_storage_access(struct pt_regs *regs)
|
||||
{
|
||||
|
|
@ -470,4 +470,4 @@ void do_secure_storage_access(struct pt_regs *regs)
|
|||
}
|
||||
NOKPROBE_SYMBOL(do_secure_storage_access);
|
||||
|
||||
#endif /* CONFIG_PGSTE */
|
||||
#endif /* CONFIG_KVM */
|
||||
|
|
|
|||
2436
arch/s390/mm/gmap.c
2436
arch/s390/mm/gmap.c
File diff suppressed because it is too large
Load diff
|
|
@ -15,7 +15,6 @@
|
|||
#include <linux/pagewalk.h>
|
||||
#include <linux/ksm.h>
|
||||
#include <asm/gmap_helpers.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
/**
|
||||
* ptep_zap_softleaf_entry() - discard a software leaf entry.
|
||||
|
|
@ -47,9 +46,7 @@ static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
|
|||
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long pgstev;
|
||||
spinlock_t *ptl;
|
||||
pgste_t pgste;
|
||||
pte_t *ptep;
|
||||
|
||||
mmap_assert_locked(mm);
|
||||
|
|
@ -64,18 +61,8 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
|
|||
if (unlikely(!ptep))
|
||||
return;
|
||||
if (pte_swap(*ptep)) {
|
||||
preempt_disable();
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgstev = pgste_val(pgste);
|
||||
|
||||
if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
|
||||
(pgstev & _PGSTE_GPS_ZERO)) {
|
||||
ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
|
||||
pte_clear(mm, vmaddr, ptep);
|
||||
}
|
||||
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
preempt_enable();
|
||||
ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
|
||||
pte_clear(mm, vmaddr, ptep);
|
||||
}
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
|
|
@ -108,6 +95,85 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_helper_discard);
|
||||
|
||||
/**
|
||||
* gmap_helper_try_set_pte_unused() - mark a pte entry as unused
|
||||
* @mm: the mm
|
||||
* @vmaddr: the userspace address whose pte is to be marked
|
||||
*
|
||||
* Mark the pte corresponding the given address as unused. This will cause
|
||||
* core mm code to just drop this page instead of swapping it.
|
||||
*
|
||||
* This function needs to be called with interrupts disabled (for example
|
||||
* while holding a spinlock), or while holding the mmap lock. Normally this
|
||||
* function is called as a result of an unmap operation, and thus KVM common
|
||||
* code will already hold kvm->mmu_lock in write mode.
|
||||
*
|
||||
* Context: Needs to be called while holding the mmap lock or with interrupts
|
||||
* disabled.
|
||||
*/
|
||||
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
|
||||
{
|
||||
pmd_t *pmdp, pmd, pmdval;
|
||||
pud_t *pudp, pud;
|
||||
p4d_t *p4dp, p4d;
|
||||
pgd_t *pgdp, pgd;
|
||||
spinlock_t *ptl; /* Lock for the host (userspace) page table */
|
||||
pte_t *ptep;
|
||||
|
||||
pgdp = pgd_offset(mm, vmaddr);
|
||||
pgd = pgdp_get(pgdp);
|
||||
if (pgd_none(pgd) || !pgd_present(pgd))
|
||||
return;
|
||||
|
||||
p4dp = p4d_offset(pgdp, vmaddr);
|
||||
p4d = p4dp_get(p4dp);
|
||||
if (p4d_none(p4d) || !p4d_present(p4d))
|
||||
return;
|
||||
|
||||
pudp = pud_offset(p4dp, vmaddr);
|
||||
pud = pudp_get(pudp);
|
||||
if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
|
||||
return;
|
||||
|
||||
pmdp = pmd_offset(pudp, vmaddr);
|
||||
pmd = pmdp_get_lockless(pmdp);
|
||||
if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
|
||||
return;
|
||||
|
||||
ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl);
|
||||
if (!ptep)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Several paths exists that takes the ptl lock and then call the
|
||||
* mmu_notifier, which takes the mmu_lock. The unmap path, instead,
|
||||
* takes the mmu_lock in write mode first, and then potentially
|
||||
* calls this function, which takes the ptl lock. This can lead to a
|
||||
* deadlock.
|
||||
* The unused page mechanism is only an optimization, if the
|
||||
* _PAGE_UNUSED bit is not set, the unused page is swapped as normal
|
||||
* instead of being discarded.
|
||||
* If the lock is contended the bit is not set and the deadlock is
|
||||
* avoided.
|
||||
*/
|
||||
if (spin_trylock(ptl)) {
|
||||
/*
|
||||
* Make sure the pte we are touching is still the correct
|
||||
* one. In theory this check should not be needed, but
|
||||
* better safe than sorry.
|
||||
* Disabling interrupts or holding the mmap lock is enough to
|
||||
* guarantee that no concurrent updates to the page tables
|
||||
* are possible.
|
||||
*/
|
||||
if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp))))
|
||||
__atomic64_or(_PAGE_UNUSED, (long *)ptep);
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
|
||||
pte_unmap(ptep);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);
|
||||
|
||||
static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -135,29 +135,6 @@ static inline pte_t __rste_to_pte(unsigned long rste)
|
|||
return __pte(pteval);
|
||||
}
|
||||
|
||||
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
|
||||
{
|
||||
struct folio *folio;
|
||||
unsigned long size, paddr;
|
||||
|
||||
if (!mm_uses_skeys(mm) ||
|
||||
rste & _SEGMENT_ENTRY_INVALID)
|
||||
return;
|
||||
|
||||
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
|
||||
folio = page_folio(pud_page(__pud(rste)));
|
||||
size = PUD_SIZE;
|
||||
paddr = rste & PUD_MASK;
|
||||
} else {
|
||||
folio = page_folio(pmd_page(__pmd(rste)));
|
||||
size = PMD_SIZE;
|
||||
paddr = rste & PMD_MASK;
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(PG_arch_1, &folio->flags.f))
|
||||
__storage_key_init_range(paddr, paddr + size);
|
||||
}
|
||||
|
||||
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
{
|
||||
|
|
@ -173,7 +150,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|||
} else if (likely(pte_present(pte)))
|
||||
rste |= _SEGMENT_ENTRY_LARGE;
|
||||
|
||||
clear_huge_pte_skeys(mm, rste);
|
||||
set_pte(ptep, __pte(rste));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <asm/page.h>
|
||||
|
||||
int __bootdata_preserved(cmma_flag);
|
||||
EXPORT_SYMBOL(cmma_flag);
|
||||
|
||||
void arch_free_page(struct page *page, int order)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -16,13 +16,6 @@
|
|||
#include <asm/asm.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
|
||||
{
|
||||
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
|
||||
: [addr] "+a" (addr) : [skey] "d" (skey));
|
||||
return addr;
|
||||
}
|
||||
|
||||
void __storage_key_init_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long boundary, size;
|
||||
|
|
|
|||
|
|
@ -114,30 +114,6 @@ err_p4d:
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PGSTE
|
||||
|
||||
struct ptdesc *page_table_alloc_pgste_noprof(struct mm_struct *mm)
|
||||
{
|
||||
struct ptdesc *ptdesc;
|
||||
u64 *table;
|
||||
|
||||
ptdesc = pagetable_alloc_noprof(GFP_KERNEL_ACCOUNT, 0);
|
||||
if (ptdesc) {
|
||||
table = (u64 *)ptdesc_address(ptdesc);
|
||||
__arch_set_page_dat(table, 1);
|
||||
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
|
||||
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
|
||||
}
|
||||
return ptdesc;
|
||||
}
|
||||
|
||||
void page_table_free_pgste(struct ptdesc *ptdesc)
|
||||
{
|
||||
pagetable_free(ptdesc);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PGSTE */
|
||||
|
||||
unsigned long *page_table_alloc_noprof(struct mm_struct *mm)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL_ACCOUNT;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/page-states.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/machine.h>
|
||||
|
||||
pgprot_t pgprot_writecombine(pgprot_t prot)
|
||||
|
|
@ -116,149 +115,14 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
|
|||
return old;
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_get(pte_t *ptep)
|
||||
{
|
||||
unsigned long pgste = 0;
|
||||
#ifdef CONFIG_PGSTE
|
||||
pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
|
||||
#endif
|
||||
return __pgste(pgste);
|
||||
}
|
||||
|
||||
static inline void pgste_set(pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
unsigned long address, bits, skey;
|
||||
|
||||
if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
|
||||
return pgste;
|
||||
address = pte_val(pte) & PAGE_MASK;
|
||||
skey = (unsigned long) page_get_storage_key(address);
|
||||
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
|
||||
/* Transfer page changed & referenced bit to guest bits in pgste */
|
||||
pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
|
||||
/* Copy page access key and fetch protection bit to pgste */
|
||||
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
|
||||
#endif
|
||||
return pgste;
|
||||
|
||||
}
|
||||
|
||||
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
unsigned long address;
|
||||
unsigned long nkey;
|
||||
|
||||
if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
|
||||
return;
|
||||
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
|
||||
address = pte_val(entry) & PAGE_MASK;
|
||||
/*
|
||||
* Set page access key and fetch protection bit from pgste.
|
||||
* The guest C/R information is still in the PGSTE, set real
|
||||
* key C/R to 0.
|
||||
*/
|
||||
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
|
||||
page_set_storage_key(address, nkey, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if ((pte_val(entry) & _PAGE_PRESENT) &&
|
||||
(pte_val(entry) & _PAGE_WRITE) &&
|
||||
!(pte_val(entry) & _PAGE_INVALID)) {
|
||||
if (!machine_has_esop()) {
|
||||
/*
|
||||
* Without enhanced suppression-on-protection force
|
||||
* the dirty bit on for all writable ptes.
|
||||
*/
|
||||
entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
|
||||
entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
|
||||
}
|
||||
if (!(pte_val(entry) & _PAGE_PROTECT))
|
||||
/* This pte allows write access, set user-dirty */
|
||||
pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
|
||||
}
|
||||
#endif
|
||||
set_pte(ptep, entry);
|
||||
return pgste;
|
||||
}
|
||||
|
||||
static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
|
||||
unsigned long addr,
|
||||
pte_t *ptep, pgste_t pgste)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
unsigned long bits;
|
||||
|
||||
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
|
||||
if (bits) {
|
||||
pgste = __pgste(pgste_val(pgste) ^ bits);
|
||||
ptep_notify(mm, addr, ptep, bits);
|
||||
}
|
||||
#endif
|
||||
return pgste;
|
||||
}
|
||||
|
||||
static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
pgste_t pgste = __pgste(0);
|
||||
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
|
||||
}
|
||||
return pgste;
|
||||
}
|
||||
|
||||
static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
pgste_t pgste, pte_t old, pte_t new)
|
||||
{
|
||||
if (mm_has_pgste(mm)) {
|
||||
if (pte_val(old) & _PAGE_INVALID)
|
||||
pgste_set_key(ptep, pgste, new, mm);
|
||||
if (pte_val(new) & _PAGE_INVALID) {
|
||||
pgste = pgste_update_all(old, pgste, mm);
|
||||
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
|
||||
_PGSTE_GPS_USAGE_UNUSED)
|
||||
old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
|
||||
}
|
||||
pgste = pgste_set_pte(ptep, pgste, new);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
} else {
|
||||
set_pte(ptep, new);
|
||||
}
|
||||
return old;
|
||||
}
|
||||
|
||||
pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t new)
|
||||
{
|
||||
pgste_t pgste;
|
||||
pte_t old;
|
||||
int nodat;
|
||||
|
||||
preempt_disable();
|
||||
pgste = ptep_xchg_start(mm, addr, ptep);
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
old = ptep_flush_direct(mm, addr, ptep, nodat);
|
||||
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
|
||||
old = ptep_flush_direct(mm, addr, ptep, 1);
|
||||
set_pte(ptep, new);
|
||||
preempt_enable();
|
||||
return old;
|
||||
}
|
||||
|
|
@ -292,15 +156,11 @@ EXPORT_SYMBOL(ptep_reset_dat_prot);
|
|||
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t new)
|
||||
{
|
||||
pgste_t pgste;
|
||||
pte_t old;
|
||||
int nodat;
|
||||
|
||||
preempt_disable();
|
||||
pgste = ptep_xchg_start(mm, addr, ptep);
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
old = ptep_flush_lazy(mm, addr, ptep, nodat);
|
||||
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
|
||||
old = ptep_flush_lazy(mm, addr, ptep, 1);
|
||||
set_pte(ptep, new);
|
||||
preempt_enable();
|
||||
return old;
|
||||
}
|
||||
|
|
@ -309,47 +169,22 @@ EXPORT_SYMBOL(ptep_xchg_lazy);
|
|||
pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
pgste_t pgste;
|
||||
pte_t old;
|
||||
int nodat;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
pgste = ptep_xchg_start(mm, addr, ptep);
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
old = ptep_flush_lazy(mm, addr, ptep, nodat);
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_update_all(old, pgste, mm);
|
||||
pgste_set(ptep, pgste);
|
||||
}
|
||||
return old;
|
||||
return ptep_flush_lazy(vma->vm_mm, addr, ptep, 1);
|
||||
}
|
||||
|
||||
void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
|
||||
pte_t *ptep, pte_t old_pte, pte_t pte)
|
||||
{
|
||||
pgste_t pgste;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
if (mm_has_pgste(mm)) {
|
||||
pgste = pgste_get(ptep);
|
||||
pgste_set_key(ptep, pgste, pte, mm);
|
||||
pgste = pgste_set_pte(ptep, pgste, pte);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
} else {
|
||||
set_pte(ptep, pte);
|
||||
}
|
||||
set_pte(ptep, pte);
|
||||
}
|
||||
|
||||
static inline void pmdp_idte_local(struct mm_struct *mm,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
if (machine_has_tlb_guest())
|
||||
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
|
||||
mm->context.asce, IDTE_LOCAL);
|
||||
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL);
|
||||
else
|
||||
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
|
||||
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||
gmap_pmdp_idte_local(mm, addr);
|
||||
}
|
||||
|
||||
static inline void pmdp_idte_global(struct mm_struct *mm,
|
||||
|
|
@ -358,12 +193,8 @@ static inline void pmdp_idte_global(struct mm_struct *mm,
|
|||
if (machine_has_tlb_guest()) {
|
||||
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
|
||||
mm->context.asce, IDTE_GLOBAL);
|
||||
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||
gmap_pmdp_idte_global(mm, addr);
|
||||
} else {
|
||||
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
|
||||
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
|
||||
gmap_pmdp_idte_global(mm, addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -398,8 +229,6 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
|
|||
cpumask_of(smp_processor_id()))) {
|
||||
set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
|
||||
mm->context.flush_mm = 1;
|
||||
if (mm_has_pgste(mm))
|
||||
gmap_pmdp_invalidate(mm, addr);
|
||||
} else {
|
||||
pmdp_idte_global(mm, addr, pmdp);
|
||||
}
|
||||
|
|
@ -407,40 +236,6 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
|
|||
return old;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PGSTE
|
||||
static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
/* We need a valid VMA, otherwise this is clearly a fault. */
|
||||
vma = vma_lookup(mm, addr);
|
||||
if (!vma)
|
||||
return -EFAULT;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
if (!pgd_present(*pgd))
|
||||
return -ENOENT;
|
||||
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (!p4d_present(*p4d))
|
||||
return -ENOENT;
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (!pud_present(*pud))
|
||||
return -ENOENT;
|
||||
|
||||
/* Large PUDs are not supported yet. */
|
||||
if (pud_leaf(*pud))
|
||||
return -EFAULT;
|
||||
|
||||
*pmdp = pmd_offset(pud, addr);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
|
||||
pmd_t *pmdp, pmd_t new)
|
||||
{
|
||||
|
|
@ -558,598 +353,3 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
|
|||
return pgtable;
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#ifdef CONFIG_PGSTE
|
||||
void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t entry)
|
||||
{
|
||||
pgste_t pgste;
|
||||
|
||||
/* the mm_has_pgste() check is done in set_pte_at() */
|
||||
preempt_disable();
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
|
||||
pgste_set_key(ptep, pgste, entry, mm);
|
||||
pgste = pgste_set_pte(ptep, pgste, entry);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
pgste_t pgste;
|
||||
|
||||
preempt_disable();
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* ptep_force_prot - change access rights of a locked pte
|
||||
* @mm: pointer to the process mm_struct
|
||||
* @addr: virtual address in the guest address space
|
||||
* @ptep: pointer to the page table entry
|
||||
* @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
|
||||
* @bit: pgste bit to set (e.g. for notification)
|
||||
*
|
||||
* Returns 0 if the access rights were changed and -EAGAIN if the current
|
||||
* and requested access rights are incompatible.
|
||||
*/
|
||||
int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, int prot, unsigned long bit)
|
||||
{
|
||||
pte_t entry;
|
||||
pgste_t pgste;
|
||||
int pte_i, pte_p, nodat;
|
||||
|
||||
pgste = pgste_get_lock(ptep);
|
||||
entry = *ptep;
|
||||
/* Check pte entry after all locks have been acquired */
|
||||
pte_i = pte_val(entry) & _PAGE_INVALID;
|
||||
pte_p = pte_val(entry) & _PAGE_PROTECT;
|
||||
if ((pte_i && (prot != PROT_NONE)) ||
|
||||
(pte_p && (prot & PROT_WRITE))) {
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return -EAGAIN;
|
||||
}
|
||||
/* Change access rights and set pgste bit */
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
if (prot == PROT_NONE && !pte_i) {
|
||||
ptep_flush_direct(mm, addr, ptep, nodat);
|
||||
pgste = pgste_update_all(entry, pgste, mm);
|
||||
entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
|
||||
}
|
||||
if (prot == PROT_READ && !pte_p) {
|
||||
ptep_flush_direct(mm, addr, ptep, nodat);
|
||||
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
|
||||
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
|
||||
}
|
||||
pgste = set_pgste_bit(pgste, bit);
|
||||
pgste = pgste_set_pte(ptep, pgste, entry);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
|
||||
pte_t *sptep, pte_t *tptep, pte_t pte)
|
||||
{
|
||||
pgste_t spgste, tpgste;
|
||||
pte_t spte, tpte;
|
||||
int rc = -EAGAIN;
|
||||
|
||||
if (!(pte_val(*tptep) & _PAGE_INVALID))
|
||||
return 0; /* already shadowed */
|
||||
spgste = pgste_get_lock(sptep);
|
||||
spte = *sptep;
|
||||
if (!(pte_val(spte) & _PAGE_INVALID) &&
|
||||
!((pte_val(spte) & _PAGE_PROTECT) &&
|
||||
!(pte_val(pte) & _PAGE_PROTECT))) {
|
||||
spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
|
||||
tpgste = pgste_get_lock(tptep);
|
||||
tpte = __pte((pte_val(spte) & PAGE_MASK) |
|
||||
(pte_val(pte) & _PAGE_PROTECT));
|
||||
/* don't touch the storage key - it belongs to parent pgste */
|
||||
tpgste = pgste_set_pte(tptep, tpgste, tpte);
|
||||
pgste_set_unlock(tptep, tpgste);
|
||||
rc = 1;
|
||||
}
|
||||
pgste_set_unlock(sptep, spgste);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
|
||||
{
|
||||
pgste_t pgste;
|
||||
int nodat;
|
||||
|
||||
pgste = pgste_get_lock(ptep);
|
||||
/* notifier is called by the caller */
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
ptep_flush_direct(mm, saddr, ptep, nodat);
|
||||
/* don't touch the storage key - it belongs to parent pgste */
|
||||
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
}
|
||||
|
||||
static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
|
||||
{
|
||||
if (softleaf_is_swap(entry))
|
||||
dec_mm_counter(mm, MM_SWAPENTS);
|
||||
else if (softleaf_is_migration(entry)) {
|
||||
struct folio *folio = softleaf_to_folio(entry);
|
||||
|
||||
dec_mm_counter(mm, mm_counter(folio));
|
||||
}
|
||||
free_swap_and_cache(entry);
|
||||
}
|
||||
|
||||
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, int reset)
|
||||
{
|
||||
unsigned long pgstev;
|
||||
pgste_t pgste;
|
||||
pte_t pte;
|
||||
|
||||
/* Zap unused and logically-zero pages */
|
||||
preempt_disable();
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgstev = pgste_val(pgste);
|
||||
pte = *ptep;
|
||||
if (!reset && pte_swap(pte) &&
|
||||
((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
|
||||
(pgstev & _PGSTE_GPS_ZERO))) {
|
||||
ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte));
|
||||
pte_clear(mm, addr, ptep);
|
||||
}
|
||||
if (reset)
|
||||
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
unsigned long ptev;
|
||||
pgste_t pgste;
|
||||
|
||||
/* Clear storage key ACC and F, but set R/C */
|
||||
preempt_disable();
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
|
||||
ptev = pte_val(*ptep);
|
||||
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
|
||||
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Test and reset if a guest page is dirty
|
||||
*/
|
||||
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
pgste_t pgste;
|
||||
pte_t pte;
|
||||
bool dirty;
|
||||
int nodat;
|
||||
|
||||
pgste = pgste_get_lock(ptep);
|
||||
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
|
||||
pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
|
||||
pte = *ptep;
|
||||
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
|
||||
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
|
||||
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
|
||||
ptep_ipte_global(mm, addr, ptep, nodat);
|
||||
if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
|
||||
pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
|
||||
else
|
||||
pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
|
||||
set_pte(ptep, pte);
|
||||
}
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
return dirty;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
|
||||
|
||||
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char key, bool nq)
|
||||
{
|
||||
unsigned long keyul, paddr;
|
||||
spinlock_t *ptl;
|
||||
pgste_t old, new;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* we can ignore attempts to set the key to 0, because it already is 0.
|
||||
*/
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return key ? -EFAULT : 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
return key ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
if (pmd_leaf(*pmdp)) {
|
||||
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||
paddr |= addr & ~HPAGE_MASK;
|
||||
/*
|
||||
* Huge pmds need quiescing operations, they are
|
||||
* always mapped.
|
||||
*/
|
||||
page_set_storage_key(paddr, key, 1);
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
new = old = pgste_get_lock(ptep);
|
||||
new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
|
||||
PGSTE_ACC_BITS | PGSTE_FP_BIT);
|
||||
keyul = (unsigned long) key;
|
||||
new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
|
||||
new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
|
||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||
unsigned long bits, skey;
|
||||
|
||||
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||
skey = (unsigned long) page_get_storage_key(paddr);
|
||||
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
|
||||
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
|
||||
/* Set storage key ACC and FP */
|
||||
page_set_storage_key(paddr, skey, !nq);
|
||||
/* Merge host changed & referenced into pgste */
|
||||
new = set_pgste_bit(new, bits << 52);
|
||||
}
|
||||
/* changing the guest storage key is considered a change of the page */
|
||||
if ((pgste_val(new) ^ pgste_val(old)) &
|
||||
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
|
||||
new = set_pgste_bit(new, PGSTE_UC_BIT);
|
||||
|
||||
pgste_set_unlock(ptep, new);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(set_guest_storage_key);
|
||||
|
||||
/*
|
||||
* Conditionally set a guest storage key (handling csske).
|
||||
* oldkey will be updated when either mr or mc is set and a pointer is given.
|
||||
*
|
||||
* Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
|
||||
* storage key was updated and -EFAULT on access errors.
|
||||
*/
|
||||
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char key, unsigned char *oldkey,
|
||||
bool nq, bool mr, bool mc)
|
||||
{
|
||||
unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
|
||||
int rc;
|
||||
|
||||
/* we can drop the pgste lock between getting and setting the key */
|
||||
if (mr | mc) {
|
||||
rc = get_guest_storage_key(current->mm, addr, &tmp);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (oldkey)
|
||||
*oldkey = tmp;
|
||||
if (!mr)
|
||||
mask |= _PAGE_REFERENCED;
|
||||
if (!mc)
|
||||
mask |= _PAGE_CHANGED;
|
||||
if (!((tmp ^ key) & mask))
|
||||
return 0;
|
||||
}
|
||||
rc = set_guest_storage_key(current->mm, addr, key, nq);
|
||||
return rc < 0 ? rc : 1;
|
||||
}
|
||||
EXPORT_SYMBOL(cond_set_guest_storage_key);
|
||||
|
||||
/*
|
||||
* Reset a guest reference bit (rrbe), returning the reference and changed bit.
|
||||
*
|
||||
* Returns < 0 in case of error, otherwise the cc to be reported to the guest.
|
||||
*/
|
||||
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
unsigned long paddr;
|
||||
pgste_t old, new;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
int cc = 0;
|
||||
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* the storage key is 0 and there is nothing for us to do.
|
||||
*/
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmd_leaf(*pmdp)) {
|
||||
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||
paddr |= addr & ~HPAGE_MASK;
|
||||
cc = page_reset_referenced(paddr);
|
||||
spin_unlock(ptl);
|
||||
return cc;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
new = old = pgste_get_lock(ptep);
|
||||
/* Reset guest reference bit only */
|
||||
new = clear_pgste_bit(new, PGSTE_GR_BIT);
|
||||
|
||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||
cc = page_reset_referenced(paddr);
|
||||
/* Merge real referenced bit into host-set */
|
||||
new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
|
||||
}
|
||||
/* Reflect guest's logical view, not physical */
|
||||
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
|
||||
/* Changing the guest storage key is considered a change of the page */
|
||||
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
|
||||
new = set_pgste_bit(new, PGSTE_UC_BIT);
|
||||
|
||||
pgste_set_unlock(ptep, new);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL(reset_guest_reference_bit);
|
||||
|
||||
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned char *key)
|
||||
{
|
||||
unsigned long paddr;
|
||||
spinlock_t *ptl;
|
||||
pgste_t pgste;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
/*
|
||||
* If we don't have a PTE table and if there is no huge page mapped,
|
||||
* the storage key is 0.
|
||||
*/
|
||||
*key = 0;
|
||||
|
||||
switch (pmd_lookup(mm, addr, &pmdp)) {
|
||||
case -ENOENT:
|
||||
return 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -EFAULT;
|
||||
}
|
||||
again:
|
||||
ptl = pmd_lock(mm, pmdp);
|
||||
if (!pmd_present(*pmdp)) {
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmd_leaf(*pmdp)) {
|
||||
paddr = pmd_val(*pmdp) & HPAGE_MASK;
|
||||
paddr |= addr & ~HPAGE_MASK;
|
||||
*key = page_get_storage_key(paddr);
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
|
||||
if (!ptep)
|
||||
goto again;
|
||||
pgste = pgste_get_lock(ptep);
|
||||
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
|
||||
paddr = pte_val(*ptep) & PAGE_MASK;
|
||||
if (!(pte_val(*ptep) & _PAGE_INVALID))
|
||||
*key = page_get_storage_key(paddr);
|
||||
/* Reflect guest's logical view, not physical */
|
||||
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(get_guest_storage_key);
|
||||
|
||||
/**
|
||||
* pgste_perform_essa - perform ESSA actions on the PGSTE.
|
||||
* @mm: the memory context. It must have PGSTEs, no check is performed here!
|
||||
* @hva: the host virtual address of the page whose PGSTE is to be processed
|
||||
* @orc: the specific action to perform, see the ESSA_SET_* macros.
|
||||
* @oldpte: the PTE will be saved there if the pointer is not NULL.
|
||||
* @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
|
||||
*
|
||||
* Return: 1 if the page is to be added to the CBRL, otherwise 0,
|
||||
* or < 0 in case of error. -EINVAL is returned for invalid values
|
||||
* of orc, -EFAULT for invalid addresses.
|
||||
*/
|
||||
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
|
||||
unsigned long *oldpte, unsigned long *oldpgste)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long pgstev;
|
||||
spinlock_t *ptl;
|
||||
pgste_t pgste;
|
||||
pte_t *ptep;
|
||||
int res = 0;
|
||||
|
||||
WARN_ON_ONCE(orc > ESSA_MAX);
|
||||
if (unlikely(orc > ESSA_MAX))
|
||||
return -EINVAL;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
pgste = pgste_get_lock(ptep);
|
||||
pgstev = pgste_val(pgste);
|
||||
if (oldpte)
|
||||
*oldpte = pte_val(*ptep);
|
||||
if (oldpgste)
|
||||
*oldpgste = pgstev;
|
||||
|
||||
switch (orc) {
|
||||
case ESSA_GET_STATE:
|
||||
break;
|
||||
case ESSA_SET_STABLE:
|
||||
pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE;
|
||||
break;
|
||||
case ESSA_SET_UNUSED:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_UNUSED;
|
||||
if (pte_val(*ptep) & _PAGE_INVALID)
|
||||
res = 1;
|
||||
break;
|
||||
case ESSA_SET_VOLATILE:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
|
||||
if (pte_val(*ptep) & _PAGE_INVALID)
|
||||
res = 1;
|
||||
break;
|
||||
case ESSA_SET_POT_VOLATILE:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||
pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
|
||||
break;
|
||||
}
|
||||
if (pgstev & _PGSTE_GPS_ZERO) {
|
||||
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
|
||||
break;
|
||||
}
|
||||
if (!(pgstev & PGSTE_GC_BIT)) {
|
||||
pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
|
||||
res = 1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ESSA_SET_STABLE_RESIDENT:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE;
|
||||
/*
|
||||
* Since the resident state can go away any time after this
|
||||
* call, we will not make this page resident. We can revisit
|
||||
* this decision if a guest will ever start using this.
|
||||
*/
|
||||
break;
|
||||
case ESSA_SET_STABLE_IF_RESIDENT:
|
||||
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE;
|
||||
}
|
||||
break;
|
||||
case ESSA_SET_STABLE_NODAT:
|
||||
pgstev &= ~_PGSTE_GPS_USAGE_MASK;
|
||||
pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
|
||||
break;
|
||||
default:
|
||||
/* we should never get here! */
|
||||
break;
|
||||
}
|
||||
/* If we are discarding a page, set it to logical zero */
|
||||
if (res)
|
||||
pgstev |= _PGSTE_GPS_ZERO;
|
||||
|
||||
pgste = __pgste(pgstev);
|
||||
pgste_set_unlock(ptep, pgste);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(pgste_perform_essa);
|
||||
|
||||
/**
|
||||
* set_pgste_bits - set specific PGSTE bits.
|
||||
* @mm: the memory context. It must have PGSTEs, no check is performed here!
|
||||
* @hva: the host virtual address of the page whose PGSTE is to be processed
|
||||
* @bits: a bitmask representing the bits that will be touched
|
||||
* @value: the values of the bits to be written. Only the bits in the mask
|
||||
* will be written.
|
||||
*
|
||||
* Return: 0 on success, < 0 in case of error.
|
||||
*/
|
||||
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
|
||||
unsigned long bits, unsigned long value)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pgste_t new;
|
||||
pte_t *ptep;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
new = pgste_get_lock(ptep);
|
||||
|
||||
new = clear_pgste_bit(new, bits);
|
||||
new = set_pgste_bit(new, value & bits);
|
||||
|
||||
pgste_set_unlock(ptep, new);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(set_pgste_bits);
|
||||
|
||||
/**
|
||||
* get_pgste - get the current PGSTE for the given address.
|
||||
* @mm: the memory context. It must have PGSTEs, no check is performed here!
|
||||
* @hva: the host virtual address of the page whose PGSTE is to be processed
|
||||
* @pgstep: will be written with the current PGSTE for the given address.
|
||||
*
|
||||
* Return: 0 on success, < 0 in case of error.
|
||||
*/
|
||||
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
vma = vma_lookup(mm, hva);
|
||||
if (!vma || is_vm_hugetlb_page(vma))
|
||||
return -EFAULT;
|
||||
ptep = get_locked_pte(mm, hva, &ptl);
|
||||
if (unlikely(!ptep))
|
||||
return -EFAULT;
|
||||
*pgstep = pgste_val(pgste_get(ptep));
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(get_pgste);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -645,7 +645,9 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem
|
|||
|
||||
struct kvm_s390_adapter_int {
|
||||
u64 ind_addr;
|
||||
u64 ind_gaddr;
|
||||
u64 summary_addr;
|
||||
u64 summary_gaddr;
|
||||
u64 ind_offset;
|
||||
u32 summary_offset;
|
||||
u32 adapter_id;
|
||||
|
|
|
|||
|
|
@ -984,6 +984,7 @@ struct kvm_enable_cap {
|
|||
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
|
||||
#define KVM_CAP_ARM_SEA_TO_USER 245
|
||||
#define KVM_CAP_S390_USER_OPEREXEC 246
|
||||
#define KVM_CAP_S390_KEYOP 247
|
||||
|
||||
struct kvm_irq_routing_irqchip {
|
||||
__u32 irqchip;
|
||||
|
|
@ -1229,6 +1230,16 @@ struct kvm_vfio_spapr_tce {
|
|||
__s32 tablefd;
|
||||
};
|
||||
|
||||
#define KVM_S390_KEYOP_ISKE 0x01
|
||||
#define KVM_S390_KEYOP_RRBE 0x02
|
||||
#define KVM_S390_KEYOP_SSKE 0x03
|
||||
struct kvm_s390_keyop {
|
||||
__u64 guest_addr;
|
||||
__u8 key;
|
||||
__u8 operation;
|
||||
__u8 pad[6];
|
||||
};
|
||||
|
||||
/*
|
||||
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
|
||||
* a vcpu fd.
|
||||
|
|
@ -1248,6 +1259,7 @@ struct kvm_vfio_spapr_tce {
|
|||
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
|
||||
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
|
||||
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
|
||||
#define KVM_S390_KEYOP _IOWR(KVMIO, 0x53, struct kvm_s390_keyop)
|
||||
|
||||
/* Device model IOC */
|
||||
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
|
||||
|
|
|
|||
|
|
@ -343,15 +343,6 @@ int hugepage_madvise(struct vm_area_struct *vma,
|
|||
{
|
||||
switch (advice) {
|
||||
case MADV_HUGEPAGE:
|
||||
#ifdef CONFIG_S390
|
||||
/*
|
||||
* qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
|
||||
* can't handle this properly after s390_enable_sie, so we simply
|
||||
* ignore the madvise to prevent qemu from causing a SIGSEGV.
|
||||
*/
|
||||
if (mm_has_pgste(vma->vm_mm))
|
||||
return 0;
|
||||
#endif
|
||||
*vm_flags &= ~VM_NOHUGEPAGE;
|
||||
*vm_flags |= VM_HUGEPAGE;
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -203,6 +203,7 @@ TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
|
|||
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
|
||||
TEST_GEN_PROGS_s390 += s390/ucontrol_test
|
||||
TEST_GEN_PROGS_s390 += s390/user_operexec
|
||||
TEST_GEN_PROGS_s390 += s390/keyop
|
||||
TEST_GEN_PROGS_s390 += rseq_test
|
||||
|
||||
TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
|
||||
|
|
|
|||
299
tools/testing/selftests/kvm/s390/keyop.c
Normal file
299
tools/testing/selftests/kvm/s390/keyop.c
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Test for s390x KVM_S390_KEYOP
|
||||
*
|
||||
* Copyright IBM Corp. 2026
|
||||
*
|
||||
* Authors:
|
||||
* Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/bits.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "kselftest.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define BUF_PAGES 128UL
|
||||
#define GUEST_PAGES 256UL
|
||||
|
||||
#define BUF_START_GFN (GUEST_PAGES - BUF_PAGES)
|
||||
#define BUF_START_ADDR (BUF_START_GFN << PAGE_SHIFT)
|
||||
|
||||
#define KEY_BITS_ACC 0xf0
|
||||
#define KEY_BIT_F 0x08
|
||||
#define KEY_BIT_R 0x04
|
||||
#define KEY_BIT_C 0x02
|
||||
|
||||
#define KEY_BITS_RC (KEY_BIT_R | KEY_BIT_C)
|
||||
#define KEY_BITS_ALL (KEY_BITS_ACC | KEY_BIT_F | KEY_BITS_RC)
|
||||
|
||||
static unsigned char tmp[BUF_PAGES];
|
||||
static unsigned char old[BUF_PAGES];
|
||||
static unsigned char expected[BUF_PAGES];
|
||||
|
||||
static int _get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
|
||||
{
|
||||
struct kvm_s390_skeys skeys_ioctl = {
|
||||
.start_gfn = BUF_START_GFN,
|
||||
.count = BUF_PAGES,
|
||||
.skeydata_addr = (unsigned long)skeys,
|
||||
};
|
||||
|
||||
return __vm_ioctl(vcpu->vm, KVM_S390_GET_SKEYS, &skeys_ioctl);
|
||||
}
|
||||
|
||||
static void get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
|
||||
{
|
||||
int r = _get_skeys(vcpu, skeys);
|
||||
|
||||
TEST_ASSERT(!r, "Failed to get storage keys, r=%d", r);
|
||||
}
|
||||
|
||||
static void set_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
|
||||
{
|
||||
struct kvm_s390_skeys skeys_ioctl = {
|
||||
.start_gfn = BUF_START_GFN,
|
||||
.count = BUF_PAGES,
|
||||
.skeydata_addr = (unsigned long)skeys,
|
||||
};
|
||||
int r;
|
||||
|
||||
r = __vm_ioctl(vcpu->vm, KVM_S390_SET_SKEYS, &skeys_ioctl);
|
||||
TEST_ASSERT(!r, "Failed to set storage keys, r=%d", r);
|
||||
}
|
||||
|
||||
static int do_keyop(struct kvm_vcpu *vcpu, int op, unsigned long page_idx, unsigned char skey)
|
||||
{
|
||||
struct kvm_s390_keyop keyop = {
|
||||
.guest_addr = BUF_START_ADDR + page_idx * PAGE_SIZE,
|
||||
.key = skey,
|
||||
.operation = op,
|
||||
};
|
||||
int r;
|
||||
|
||||
r = __vm_ioctl(vcpu->vm, KVM_S390_KEYOP, &keyop);
|
||||
TEST_ASSERT(!r, "Failed to perform keyop, r=%d", r);
|
||||
TEST_ASSERT((keyop.key & 1) == 0,
|
||||
"Last bit of key is 1, should be 0! page %lu, new key=%#x, old key=%#x",
|
||||
page_idx, skey, keyop.key);
|
||||
|
||||
return keyop.key;
|
||||
}
|
||||
|
||||
static void fault_in_buffer(struct kvm_vcpu *vcpu, int where, int cur_loc)
|
||||
{
|
||||
unsigned long i;
|
||||
int r;
|
||||
|
||||
if (where != cur_loc)
|
||||
return;
|
||||
|
||||
for (i = 0; i < BUF_PAGES; i++) {
|
||||
r = ioctl(vcpu->fd, KVM_S390_VCPU_FAULT, BUF_START_ADDR + i * PAGE_SIZE);
|
||||
TEST_ASSERT(!r, "Faulting in buffer page %lu, r=%d", i, r);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void set_pattern(unsigned char skeys[])
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BUF_PAGES; i++)
|
||||
skeys[i] = i << 1;
|
||||
}
|
||||
|
||||
static void dump_sk(const unsigned char skeys[], const char *descr)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
fprintf(stderr, "# %s:\n", descr);
|
||||
for (i = 0; i < BUF_PAGES; i += 32) {
|
||||
fprintf(stderr, "# %3d: ", i);
|
||||
for (j = 0; j < 32; j++)
|
||||
fprintf(stderr, "%02x ", skeys[i + j]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static inline void compare(const unsigned char what[], const unsigned char expected[],
|
||||
const char *descr, int fault_in_loc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BUF_PAGES; i++) {
|
||||
if (expected[i] != what[i]) {
|
||||
dump_sk(expected, "Expected");
|
||||
dump_sk(what, "Got");
|
||||
}
|
||||
TEST_ASSERT(expected[i] == what[i],
|
||||
"%s! fault-in location %d, page %d, expected %#x, got %#x",
|
||||
descr, fault_in_loc, i, expected[i], what[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void clear_all(void)
|
||||
{
|
||||
memset(tmp, 0, BUF_PAGES);
|
||||
memset(old, 0, BUF_PAGES);
|
||||
memset(expected, 0, BUF_PAGES);
|
||||
}
|
||||
|
||||
static void test_init(struct kvm_vcpu *vcpu, int fault_in)
|
||||
{
|
||||
/* Set all storage keys to zero */
|
||||
fault_in_buffer(vcpu, fault_in, 1);
|
||||
set_skeys(vcpu, expected);
|
||||
|
||||
fault_in_buffer(vcpu, fault_in, 2);
|
||||
get_skeys(vcpu, tmp);
|
||||
compare(tmp, expected, "Setting keys not zero", fault_in);
|
||||
|
||||
/* Set storage keys to a sequential pattern */
|
||||
fault_in_buffer(vcpu, fault_in, 3);
|
||||
set_pattern(expected);
|
||||
set_skeys(vcpu, expected);
|
||||
|
||||
fault_in_buffer(vcpu, fault_in, 4);
|
||||
get_skeys(vcpu, tmp);
|
||||
compare(tmp, expected, "Setting storage keys failed", fault_in);
|
||||
}
|
||||
|
||||
static void test_rrbe(struct kvm_vcpu *vcpu, int fault_in)
|
||||
{
|
||||
unsigned char k;
|
||||
int i;
|
||||
|
||||
/* Set storage keys to a sequential pattern */
|
||||
fault_in_buffer(vcpu, fault_in, 1);
|
||||
set_pattern(expected);
|
||||
set_skeys(vcpu, expected);
|
||||
|
||||
/* Call the RRBE KEYOP ioctl on each page and verify the result */
|
||||
fault_in_buffer(vcpu, fault_in, 2);
|
||||
for (i = 0; i < BUF_PAGES; i++) {
|
||||
k = do_keyop(vcpu, KVM_S390_KEYOP_RRBE, i, 0xff);
|
||||
TEST_ASSERT((expected[i] & KEY_BITS_RC) == k,
|
||||
"Old R or C value mismatch! expected: %#x, got %#x",
|
||||
expected[i] & KEY_BITS_RC, k);
|
||||
if (i == BUF_PAGES / 2)
|
||||
fault_in_buffer(vcpu, fault_in, 3);
|
||||
}
|
||||
|
||||
for (i = 0; i < BUF_PAGES; i++)
|
||||
expected[i] &= ~KEY_BIT_R;
|
||||
|
||||
/* Verify that only the R bit has been cleared */
|
||||
fault_in_buffer(vcpu, fault_in, 4);
|
||||
get_skeys(vcpu, tmp);
|
||||
compare(tmp, expected, "New value mismatch", fault_in);
|
||||
}
|
||||
|
||||
static void test_iske(struct kvm_vcpu *vcpu, int fault_in)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Set storage keys to a sequential pattern */
|
||||
fault_in_buffer(vcpu, fault_in, 1);
|
||||
set_pattern(expected);
|
||||
set_skeys(vcpu, expected);
|
||||
|
||||
/* Call the ISKE KEYOP ioctl on each page and verify the result */
|
||||
fault_in_buffer(vcpu, fault_in, 2);
|
||||
for (i = 0; i < BUF_PAGES; i++) {
|
||||
tmp[i] = do_keyop(vcpu, KVM_S390_KEYOP_ISKE, i, 0xff);
|
||||
if (i == BUF_PAGES / 2)
|
||||
fault_in_buffer(vcpu, fault_in, 3);
|
||||
}
|
||||
compare(tmp, expected, "Old value mismatch", fault_in);
|
||||
|
||||
/* Check storage keys have not changed */
|
||||
fault_in_buffer(vcpu, fault_in, 4);
|
||||
get_skeys(vcpu, tmp);
|
||||
compare(tmp, expected, "Storage keys values changed", fault_in);
|
||||
}
|
||||
|
||||
static void test_sske(struct kvm_vcpu *vcpu, int fault_in)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Set storage keys to a sequential pattern */
|
||||
fault_in_buffer(vcpu, fault_in, 1);
|
||||
set_pattern(tmp);
|
||||
set_skeys(vcpu, tmp);
|
||||
|
||||
/* Call the SSKE KEYOP ioctl on each page and verify the result */
|
||||
fault_in_buffer(vcpu, fault_in, 2);
|
||||
for (i = 0; i < BUF_PAGES; i++) {
|
||||
expected[i] = ~tmp[i] & KEY_BITS_ALL;
|
||||
/* Set the new storage keys to be the bit-inversion of the previous ones */
|
||||
old[i] = do_keyop(vcpu, KVM_S390_KEYOP_SSKE, i, expected[i] | 1);
|
||||
if (i == BUF_PAGES / 2)
|
||||
fault_in_buffer(vcpu, fault_in, 3);
|
||||
}
|
||||
compare(old, tmp, "Old value mismatch", fault_in);
|
||||
|
||||
/* Verify that the storage keys have been set correctly */
|
||||
fault_in_buffer(vcpu, fault_in, 4);
|
||||
get_skeys(vcpu, tmp);
|
||||
compare(tmp, expected, "New value mismatch", fault_in);
|
||||
}
|
||||
|
||||
static struct testdef {
|
||||
const char *name;
|
||||
void (*test)(struct kvm_vcpu *vcpu, int fault_in_location);
|
||||
int n_fault_in_locations;
|
||||
} testplan[] = {
|
||||
{ "Initialization", test_init, 5 },
|
||||
{ "RRBE", test_rrbe, 5 },
|
||||
{ "ISKE", test_iske, 5 },
|
||||
{ "SSKE", test_sske, 5 },
|
||||
};
|
||||
|
||||
static void run_test(void (*the_test)(struct kvm_vcpu *, int), int fault_in_location)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
int r;
|
||||
|
||||
vm = vm_create_barebones();
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, GUEST_PAGES, 0);
|
||||
vcpu = __vm_vcpu_add(vm, 0);
|
||||
|
||||
r = _get_skeys(vcpu, tmp);
|
||||
TEST_ASSERT(r == KVM_S390_GET_SKEYS_NONE,
|
||||
"Storage keys are not disabled initially, r=%d", r);
|
||||
|
||||
clear_all();
|
||||
|
||||
the_test(vcpu, fault_in_location);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, f;
|
||||
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_KEYOP));
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
|
||||
|
||||
ksft_print_header();
|
||||
for (i = 0, f = 0; i < ARRAY_SIZE(testplan); i++)
|
||||
f += testplan[i].n_fault_in_locations;
|
||||
ksft_set_plan(f);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(testplan); i++) {
|
||||
for (f = 0; f < testplan[i].n_fault_in_locations; f++) {
|
||||
run_test(testplan[i].test, f);
|
||||
ksft_test_result_pass("%s (fault-in location %d)\n", testplan[i].name, f);
|
||||
}
|
||||
}
|
||||
|
||||
ksft_finished(); /* Print results and exit() accordingly */
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue