hyperv-next for v7.0

-----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCgAxFiEEIbPD0id6easf0xsudhRwX5BBoF4FAmmWuQwTHHdlaS5saXVA
 a2VybmVsLm9yZwAKCRB2FHBfkEGgXnnHB/41Jji+y8FHe2SqpQhUOqHb6NDEr3GX
 YpAybhz2IsBHVhbCQn789UiIcSr0UDR7wnVLAmXe+5eY/jRwNggIO3tFqLYn92pK
 KSTNafgNbLxh3iKBxRsUy0b3JutjD2LytkpFj2KVbBsZfmRxCZmKIV/4V18rV+fA
 uemvoqLwU7emEWkhZ24suHMHPVpv6xKs9O6gOrQ4+zXR0g//eMLDqb17uj8h+8sM
 ZsPsMYeuOihXlvGeBRjbnWYjA1ODWGDvwR9VT+VU4+HWht/KSr15EGeXZdV2eZUt
 e/8swbqOS94a2ZjOgStzVkcPqAF88t9zZ+gvYElTDzLlHjqbrZdpeDDt
 =A7tT
 -----END PGP SIGNATURE-----

Merge tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull Hyper-V updates from Wei Liu:

 - Debugfs support for MSHV statistics (Nuno Das Neves)

 - Support for the integrated scheduler (Stanislav Kinsburskii)

 - Various fixes for MSHV memory management and hypervisor status
   handling (Stanislav Kinsburskii)

 - Expose more capabilities and flags for MSHV partition management
   (Anatol Belski, Muminul Islam, Magnus Kulke)

 - Miscellaneous fixes to improve code quality and stability (Carlos
   López, Ethan Nelson-Moore, Li RongQing, Michael Kelley, Mukesh
   Rathor, Purna Pavan Chandra Aekkaladevi, Stanislav Kinsburskii, Uros
   Bizjak)

 - PREEMPT_RT fixes for vmbus interrupts (Jan Kiszka)

* tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (34 commits)
  mshv: Handle insufficient root memory hypervisor statuses
  mshv: Handle insufficient contiguous memory hypervisor status
  mshv: Introduce hv_deposit_memory helper functions
  mshv: Introduce hv_result_needs_memory() helper function
  mshv: Add SMT_ENABLED_GUEST partition creation flag
  mshv: Add nested virtualization creation flag
  Drivers: hv: vmbus: Simplify allocation of vmbus_evt
  mshv: expose the scrub partition hypercall
  mshv: Add support for integrated scheduler
  mshv: Use try_cmpxchg() instead of cmpxchg()
  x86/hyperv: Fix error pointer dereference
  x86/hyperv: Reserve 3 interrupt vectors used exclusively by MSHV
  Drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
  x86/hyperv: Remove ASM_CALL_CONSTRAINT with VMMCALL insn
  x86/hyperv: Use savesegment() instead of inline asm() to save segment registers
  mshv: fix SRCU protection in irqfd resampler ack handler
  mshv: make field names descriptive in a header struct
  x86/hyperv: Update comment in hyperv_cleanup()
  mshv: clear eventfd counter on irqfd shutdown
  x86/hyperv: Use memremap()/memunmap() instead of ioremap_cache()/iounmap()
  ...
This commit is contained in:
Linus Torvalds 2026-02-20 08:48:31 -08:00
commit d31558c077
27 changed files with 1775 additions and 260 deletions

View file

@ -279,7 +279,6 @@ static void hv_notify_prepare_hyp(void)
static noinline __noclone void crash_nmi_callback(struct pt_regs *regs)
{
struct hv_input_disable_hyp_ex *input;
u64 status;
int msecs = 1000, ccpu = smp_processor_id();
if (ccpu == 0) {
@ -313,7 +312,7 @@ static noinline __noclone void crash_nmi_callback(struct pt_regs *regs)
input->rip = trampoline_pa;
input->arg = devirt_arg;
status = hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL);
(void)hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL);
hv_panic_timeout_reboot();
}

View file

@ -103,9 +103,9 @@ static int hyperv_init_ghcb(void)
*/
rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
/* Mask out vTOM bit and map as decrypted */
ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary;
ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!ghcb_va)
return -ENOMEM;
@ -277,7 +277,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_ghcb_pg) {
ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg);
if (*ghcb_va)
iounmap(*ghcb_va);
memunmap(*ghcb_va);
*ghcb_va = NULL;
}
@ -558,7 +558,6 @@ void __init hyperv_init(void)
memunmap(src);
hv_remap_tsc_clocksource();
hv_root_crash_init();
hv_sleep_notifiers_register();
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
@ -567,6 +566,9 @@ void __init hyperv_init(void)
hv_set_hypercall_pg(hv_hypercall_pg);
if (hv_root_partition()) /* after set hypercall pg */
hv_root_crash_init();
skip_hypercall_pg_init:
/*
* hyperv_init() is called before LAPIC is initialized: see
@ -633,9 +635,13 @@ void hyperv_cleanup(void)
hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
/*
* Reset hypercall page reference before reset the page,
* let hypercall operations fail safely rather than
* panic the kernel for using invalid hypercall page
* Reset hv_hypercall_pg before resetting it in the hypervisor.
* hv_set_hypercall_pg(NULL) is not used because at this point in the
* panic path other CPUs have been stopped, causing static_call_update()
* to hang. So resetting hv_hypercall_pg to cause hypercalls to fail
* cleanly is only operative on 32-bit builds. But this is OK as it is
* just a preventative measure to ease detecting a hypercall being made
* after this point, which shouldn't be happening anyway.
*/
hv_hypercall_pg = NULL;

View file

@ -110,7 +110,7 @@ static void hv_vtl_ap_entry(void)
static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
{
u64 status;
u64 status, rsp, rip;
int ret = 0;
struct hv_enable_vp_vtl *input;
unsigned long irq_flags;
@ -123,9 +123,11 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
struct desc_struct *gdt;
struct task_struct *idle = idle_thread_get(cpu);
u64 rsp = (unsigned long)idle->thread.sp;
if (IS_ERR(idle))
return PTR_ERR(idle);
u64 rip = (u64)&hv_vtl_ap_entry;
rsp = (unsigned long)idle->thread.sp;
rip = (u64)&hv_vtl_ap_entry;
native_store_gdt(&gdt_ptr);
store_idt(&idt_ptr);

View file

@ -25,6 +25,7 @@
#include <asm/e820/api.h>
#include <asm/desc.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <uapi/asm/vmx.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
@ -315,16 +316,16 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu)
vmsa->gdtr.base = gdtr.address;
vmsa->gdtr.limit = gdtr.size;
asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector));
savesegment(es, vmsa->es.selector);
hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base);
asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector));
savesegment(cs, vmsa->cs.selector);
hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base);
asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector));
savesegment(ss, vmsa->ss.selector);
hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base);
asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector));
savesegment(ds, vmsa->ds.selector);
hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base);
vmsa->efer = native_read_msr(MSR_EFER);
@ -391,7 +392,7 @@ u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
register u64 __r8 asm("r8") = param2;
asm volatile("vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
: "=a" (hv_status),
"+c" (control), "+d" (param1), "+r" (__r8)
: : "cc", "memory", "r9", "r10", "r11");

View file

@ -478,6 +478,28 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
}
EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
/*
* Reserved vectors hard coded in the hypervisor. If used outside, the hypervisor
* will either crash or hang or attempt to break into debugger.
*/
static void hv_reserve_irq_vectors(void)
{
#define HYPERV_DBG_FASTFAIL_VECTOR 0x29
#define HYPERV_DBG_ASSERT_VECTOR 0x2C
#define HYPERV_DBG_SERVICE_VECTOR 0x2D
if (cpu_feature_enabled(X86_FEATURE_FRED))
return;
if (test_and_set_bit(HYPERV_DBG_ASSERT_VECTOR, system_vectors) ||
test_and_set_bit(HYPERV_DBG_SERVICE_VECTOR, system_vectors) ||
test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors))
BUG();
pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR,
HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR);
}
static void __init ms_hyperv_init_platform(void)
{
int hv_max_functions_eax, eax;
@ -510,6 +532,9 @@ static void __init ms_hyperv_init_platform(void)
hv_identify_partition_type();
if (hv_root_partition())
hv_reserve_irq_vectors();
if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED;

View file

@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
mshv_root-$(CONFIG_DEBUG_FS) += mshv_debugfs.o
mshv_vtl-y := mshv_vtl_main.o
# Code that must be built-in

View file

@ -287,11 +287,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu)
simp.simp_enabled = 1;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
/* Mask out vTOM bit and map as decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->hyp_synic_message_page =
(void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!hv_cpu->hyp_synic_message_page)
pr_err("Fail to map synic message page.\n");
} else {
@ -306,11 +306,11 @@ void hv_hyp_synic_enable_regs(unsigned int cpu)
siefp.siefp_enabled = 1;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
/* Mask out vTOM bit and map as decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->hyp_synic_event_page =
(void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC);
if (!hv_cpu->hyp_synic_event_page)
pr_err("Fail to map synic event page.\n");
} else {
@ -429,7 +429,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu)
simp.simp_enabled = 0;
if (ms_hyperv.paravisor_present || hv_root_partition()) {
if (hv_cpu->hyp_synic_message_page) {
iounmap(hv_cpu->hyp_synic_message_page);
memunmap(hv_cpu->hyp_synic_message_page);
hv_cpu->hyp_synic_message_page = NULL;
}
} else {
@ -443,7 +443,7 @@ void hv_hyp_synic_disable_regs(unsigned int cpu)
if (ms_hyperv.paravisor_present || hv_root_partition()) {
if (hv_cpu->hyp_synic_event_page) {
iounmap(hv_cpu->hyp_synic_event_page);
memunmap(hv_cpu->hyp_synic_event_page);
hv_cpu->hyp_synic_event_page = NULL;
}
} else {

View file

@ -793,6 +793,9 @@ static const struct hv_status_info hv_status_infos[] = {
_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO),
_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_ROOT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY, -ENOMEM),
_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL),
_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL),
_STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),

View file

@ -110,6 +110,50 @@ free_buf:
}
EXPORT_SYMBOL_GPL(hv_call_deposit_pages);
int hv_deposit_memory_node(int node, u64 partition_id,
u64 hv_status)
{
u32 num_pages = 1;
switch (hv_result(hv_status)) {
case HV_STATUS_INSUFFICIENT_MEMORY:
break;
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
break;
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES;
fallthrough;
case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
if (!hv_root_partition()) {
hv_status_err(hv_status, "Unexpected root memory deposit\n");
return -ENOMEM;
}
partition_id = HV_PARTITION_ID_SELF;
break;
default:
hv_status_err(hv_status, "Unexpected!\n");
return -ENOMEM;
}
return hv_call_deposit_pages(node, partition_id, num_pages);
}
EXPORT_SYMBOL_GPL(hv_deposit_memory_node);
bool hv_result_needs_memory(u64 status)
{
switch (hv_result(status)) {
case HV_STATUS_INSUFFICIENT_MEMORY:
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY:
case HV_STATUS_INSUFFICIENT_ROOT_MEMORY:
case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY:
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(hv_result_needs_memory);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
struct hv_input_add_logical_processor *input;
@ -137,7 +181,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input, output);
local_irq_restore(flags);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
if (!hv_result_success(status)) {
hv_status_err(status, "cpu %u apic ID: %u\n",
lp_index, apic_id);
@ -145,7 +189,8 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
}
break;
}
ret = hv_call_deposit_pages(node, hv_current_partition_id, 1);
ret = hv_deposit_memory_node(node, hv_current_partition_id,
status);
} while (!ret);
return ret;
@ -179,7 +224,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
if (!hv_result_success(status)) {
hv_status_err(status, "vcpu: %u, lp: %u\n",
vp_index, flags);
@ -187,7 +232,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
}
break;
}
ret = hv_call_deposit_pages(node, partition_id, 1);
ret = hv_deposit_memory_node(node, partition_id, status);
} while (!ret);

View file

@ -370,8 +370,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
* CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages
* on crash.
*/
if (cmpxchg(&msg->header.message_type, old_msg_type,
HVMSG_NONE) != old_msg_type)
if (!try_cmpxchg(&msg->header.message_type,
&old_msg_type, HVMSG_NONE))
return;
/*

726
drivers/hv/mshv_debugfs.c Normal file
View file

@ -0,0 +1,726 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2026, Microsoft Corporation.
*
* The /sys/kernel/debug/mshv directory contents.
* Contains various statistics data, provided by the hypervisor.
*
* Authors: Microsoft Linux virtualization team
*/
#include <linux/debugfs.h>
#include <linux/stringify.h>
#include <asm/mshyperv.h>
#include <linux/slab.h>
#include "mshv.h"
#include "mshv_root.h"
/* Ensure this file is not used elsewhere by accident */
#define MSHV_DEBUGFS_C
#include "mshv_debugfs_counters.c"
#define U32_BUF_SZ 11
#define U64_BUF_SZ 21
/* Only support SELF and PARENT areas */
#define NUM_STATS_AREAS 2
static_assert(HV_STATS_AREA_SELF == 0 && HV_STATS_AREA_PARENT == 1,
"SELF and PARENT areas must be usable as indices into an array of size NUM_STATS_AREAS");
/* HV_HYPERVISOR_COUNTER */
#define HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS 1
static struct dentry *mshv_debugfs;
static struct dentry *mshv_debugfs_partition;
static struct dentry *mshv_debugfs_lp;
static struct dentry **parent_vp_stats;
static struct dentry *parent_partition_stats;
static u64 mshv_lps_count;
static struct hv_stats_page **mshv_lps_stats;
static int lp_stats_show(struct seq_file *m, void *v)
{
const struct hv_stats_page *stats = m->private;
int idx;
for (idx = 0; idx < ARRAY_SIZE(hv_lp_counters); idx++) {
char *name = hv_lp_counters[idx];
if (!name)
continue;
seq_printf(m, "%-32s: %llu\n", name, stats->data[idx]);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(lp_stats);
static void mshv_lp_stats_unmap(u32 lp_index)
{
union hv_stats_object_identity identity = {
.lp.lp_index = lp_index,
.lp.stats_area_type = HV_STATS_AREA_SELF,
};
int err;
err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR,
mshv_lps_stats[lp_index], &identity);
if (err)
pr_err("%s: failed to unmap logical processor %u stats, err: %d\n",
__func__, lp_index, err);
mshv_lps_stats[lp_index] = NULL;
}
static struct hv_stats_page * __init mshv_lp_stats_map(u32 lp_index)
{
union hv_stats_object_identity identity = {
.lp.lp_index = lp_index,
.lp.stats_area_type = HV_STATS_AREA_SELF,
};
struct hv_stats_page *stats;
int err;
err = hv_map_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, &identity,
&stats);
if (err) {
pr_err("%s: failed to map logical processor %u stats, err: %d\n",
__func__, lp_index, err);
return ERR_PTR(err);
}
mshv_lps_stats[lp_index] = stats;
return stats;
}
static struct hv_stats_page * __init lp_debugfs_stats_create(u32 lp_index,
struct dentry *parent)
{
struct dentry *dentry;
struct hv_stats_page *stats;
stats = mshv_lp_stats_map(lp_index);
if (IS_ERR(stats))
return stats;
dentry = debugfs_create_file("stats", 0400, parent,
stats, &lp_stats_fops);
if (IS_ERR(dentry)) {
mshv_lp_stats_unmap(lp_index);
return ERR_CAST(dentry);
}
return stats;
}
static int __init lp_debugfs_create(u32 lp_index, struct dentry *parent)
{
struct dentry *idx;
char lp_idx_str[U32_BUF_SZ];
struct hv_stats_page *stats;
int err;
sprintf(lp_idx_str, "%u", lp_index);
idx = debugfs_create_dir(lp_idx_str, parent);
if (IS_ERR(idx))
return PTR_ERR(idx);
stats = lp_debugfs_stats_create(lp_index, idx);
if (IS_ERR(stats)) {
err = PTR_ERR(stats);
goto remove_debugfs_lp_idx;
}
return 0;
remove_debugfs_lp_idx:
debugfs_remove_recursive(idx);
return err;
}
static void mshv_debugfs_lp_remove(void)
{
int lp_index;
debugfs_remove_recursive(mshv_debugfs_lp);
for (lp_index = 0; lp_index < mshv_lps_count; lp_index++)
mshv_lp_stats_unmap(lp_index);
kfree(mshv_lps_stats);
mshv_lps_stats = NULL;
}
static int __init mshv_debugfs_lp_create(struct dentry *parent)
{
struct dentry *lp_dir;
int err, lp_index;
mshv_lps_stats = kcalloc(mshv_lps_count,
sizeof(*mshv_lps_stats),
GFP_KERNEL_ACCOUNT);
if (!mshv_lps_stats)
return -ENOMEM;
lp_dir = debugfs_create_dir("lp", parent);
if (IS_ERR(lp_dir)) {
err = PTR_ERR(lp_dir);
goto free_lp_stats;
}
for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) {
err = lp_debugfs_create(lp_index, lp_dir);
if (err)
goto remove_debugfs_lps;
}
mshv_debugfs_lp = lp_dir;
return 0;
remove_debugfs_lps:
for (lp_index -= 1; lp_index >= 0; lp_index--)
mshv_lp_stats_unmap(lp_index);
debugfs_remove_recursive(lp_dir);
free_lp_stats:
kfree(mshv_lps_stats);
mshv_lps_stats = NULL;
return err;
}
static int vp_stats_show(struct seq_file *m, void *v)
{
const struct hv_stats_page **pstats = m->private;
u64 parent_val, self_val;
int idx;
/*
* For VP and partition stats, there may be two stats areas mapped,
* SELF and PARENT. These refer to the privilege level of the data in
* each page. Some fields may be 0 in SELF and nonzero in PARENT, or
* vice versa.
*
* Hence, prioritize printing from the PARENT page (more privileged
* data), but use the value from the SELF page if the PARENT value is
* 0.
*/
for (idx = 0; idx < ARRAY_SIZE(hv_vp_counters); idx++) {
char *name = hv_vp_counters[idx];
if (!name)
continue;
parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx];
self_val = pstats[HV_STATS_AREA_SELF]->data[idx];
seq_printf(m, "%-43s: %llu\n", name,
parent_val ? parent_val : self_val);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(vp_stats);
static void vp_debugfs_remove(struct dentry *vp_stats)
{
debugfs_remove_recursive(vp_stats->d_parent);
}
static int vp_debugfs_create(u64 partition_id, u32 vp_index,
struct hv_stats_page **pstats,
struct dentry **vp_stats_ptr,
struct dentry *parent)
{
struct dentry *vp_idx_dir, *d;
char vp_idx_str[U32_BUF_SZ];
int err;
sprintf(vp_idx_str, "%u", vp_index);
vp_idx_dir = debugfs_create_dir(vp_idx_str, parent);
if (IS_ERR(vp_idx_dir))
return PTR_ERR(vp_idx_dir);
d = debugfs_create_file("stats", 0400, vp_idx_dir,
pstats, &vp_stats_fops);
if (IS_ERR(d)) {
err = PTR_ERR(d);
goto remove_debugfs_vp_idx;
}
*vp_stats_ptr = d;
return 0;
remove_debugfs_vp_idx:
debugfs_remove_recursive(vp_idx_dir);
return err;
}
static int partition_stats_show(struct seq_file *m, void *v)
{
const struct hv_stats_page **pstats = m->private;
u64 parent_val, self_val;
int idx;
for (idx = 0; idx < ARRAY_SIZE(hv_partition_counters); idx++) {
char *name = hv_partition_counters[idx];
if (!name)
continue;
parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx];
self_val = pstats[HV_STATS_AREA_SELF]->data[idx];
seq_printf(m, "%-37s: %llu\n", name,
parent_val ? parent_val : self_val);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(partition_stats);
static void mshv_partition_stats_unmap(u64 partition_id,
struct hv_stats_page *stats_page,
enum hv_stats_area_type stats_area_type)
{
union hv_stats_object_identity identity = {
.partition.partition_id = partition_id,
.partition.stats_area_type = stats_area_type,
};
int err;
err = hv_unmap_stats_page(HV_STATS_OBJECT_PARTITION, stats_page,
&identity);
if (err)
pr_err("%s: failed to unmap partition %lld %s stats, err: %d\n",
__func__, partition_id,
(stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
err);
}
static struct hv_stats_page *mshv_partition_stats_map(u64 partition_id,
enum hv_stats_area_type stats_area_type)
{
union hv_stats_object_identity identity = {
.partition.partition_id = partition_id,
.partition.stats_area_type = stats_area_type,
};
struct hv_stats_page *stats;
int err;
err = hv_map_stats_page(HV_STATS_OBJECT_PARTITION, &identity, &stats);
if (err) {
pr_err("%s: failed to map partition %lld %s stats, err: %d\n",
__func__, partition_id,
(stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent",
err);
return ERR_PTR(err);
}
return stats;
}
static int mshv_debugfs_partition_stats_create(u64 partition_id,
struct dentry **partition_stats_ptr,
struct dentry *parent)
{
struct dentry *dentry;
struct hv_stats_page **pstats;
int err;
pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *),
GFP_KERNEL_ACCOUNT);
if (!pstats)
return -ENOMEM;
pstats[HV_STATS_AREA_SELF] = mshv_partition_stats_map(partition_id,
HV_STATS_AREA_SELF);
if (IS_ERR(pstats[HV_STATS_AREA_SELF])) {
err = PTR_ERR(pstats[HV_STATS_AREA_SELF]);
goto cleanup;
}
/*
* L1VH partition cannot access its partition stats in parent area.
*/
if (is_l1vh_parent(partition_id)) {
pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
} else {
pstats[HV_STATS_AREA_PARENT] = mshv_partition_stats_map(partition_id,
HV_STATS_AREA_PARENT);
if (IS_ERR(pstats[HV_STATS_AREA_PARENT])) {
err = PTR_ERR(pstats[HV_STATS_AREA_PARENT]);
goto unmap_self;
}
if (!pstats[HV_STATS_AREA_PARENT])
pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF];
}
dentry = debugfs_create_file("stats", 0400, parent,
pstats, &partition_stats_fops);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto unmap_partition_stats;
}
*partition_stats_ptr = dentry;
return 0;
unmap_partition_stats:
if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF])
mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_PARENT],
HV_STATS_AREA_PARENT);
unmap_self:
mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_SELF],
HV_STATS_AREA_SELF);
cleanup:
kfree(pstats);
return err;
}
static void partition_debugfs_remove(u64 partition_id, struct dentry *dentry)
{
struct hv_stats_page **pstats = NULL;
pstats = dentry->d_inode->i_private;
debugfs_remove_recursive(dentry->d_parent);
if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) {
mshv_partition_stats_unmap(partition_id,
pstats[HV_STATS_AREA_PARENT],
HV_STATS_AREA_PARENT);
}
mshv_partition_stats_unmap(partition_id,
pstats[HV_STATS_AREA_SELF],
HV_STATS_AREA_SELF);
kfree(pstats);
}
static int partition_debugfs_create(u64 partition_id,
struct dentry **vp_dir_ptr,
struct dentry **partition_stats_ptr,
struct dentry *parent)
{
char part_id_str[U64_BUF_SZ];
struct dentry *part_id_dir, *vp_dir;
int err;
if (is_l1vh_parent(partition_id))
sprintf(part_id_str, "self");
else
sprintf(part_id_str, "%llu", partition_id);
part_id_dir = debugfs_create_dir(part_id_str, parent);
if (IS_ERR(part_id_dir))
return PTR_ERR(part_id_dir);
vp_dir = debugfs_create_dir("vp", part_id_dir);
if (IS_ERR(vp_dir)) {
err = PTR_ERR(vp_dir);
goto remove_debugfs_partition_id;
}
err = mshv_debugfs_partition_stats_create(partition_id,
partition_stats_ptr,
part_id_dir);
if (err)
goto remove_debugfs_partition_id;
*vp_dir_ptr = vp_dir;
return 0;
remove_debugfs_partition_id:
debugfs_remove_recursive(part_id_dir);
return err;
}
static void parent_vp_debugfs_remove(u32 vp_index,
struct dentry *vp_stats_ptr)
{
struct hv_stats_page **pstats;
pstats = vp_stats_ptr->d_inode->i_private;
vp_debugfs_remove(vp_stats_ptr);
mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
kfree(pstats);
}
static void mshv_debugfs_parent_partition_remove(void)
{
int idx;
for_each_online_cpu(idx)
parent_vp_debugfs_remove(hv_vp_index[idx],
parent_vp_stats[idx]);
partition_debugfs_remove(hv_current_partition_id,
parent_partition_stats);
kfree(parent_vp_stats);
parent_vp_stats = NULL;
parent_partition_stats = NULL;
}
static int __init parent_vp_debugfs_create(u32 vp_index,
struct dentry **vp_stats_ptr,
struct dentry *parent)
{
struct hv_stats_page **pstats;
int err;
pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *),
GFP_KERNEL_ACCOUNT);
if (!pstats)
return -ENOMEM;
err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats);
if (err)
goto cleanup;
err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats,
vp_stats_ptr, parent);
if (err)
goto unmap_vp_stats;
return 0;
unmap_vp_stats:
mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats);
cleanup:
kfree(pstats);
return err;
}
static int __init mshv_debugfs_parent_partition_create(void)
{
struct dentry *vp_dir;
int err, idx, i;
mshv_debugfs_partition = debugfs_create_dir("partition",
mshv_debugfs);
if (IS_ERR(mshv_debugfs_partition))
return PTR_ERR(mshv_debugfs_partition);
err = partition_debugfs_create(hv_current_partition_id,
&vp_dir,
&parent_partition_stats,
mshv_debugfs_partition);
if (err)
goto remove_debugfs_partition;
parent_vp_stats = kcalloc(nr_cpu_ids, sizeof(*parent_vp_stats),
GFP_KERNEL);
if (!parent_vp_stats) {
err = -ENOMEM;
goto remove_debugfs_partition;
}
for_each_online_cpu(idx) {
err = parent_vp_debugfs_create(hv_vp_index[idx],
&parent_vp_stats[idx],
vp_dir);
if (err)
goto remove_debugfs_partition_vp;
}
return 0;
remove_debugfs_partition_vp:
for_each_online_cpu(i) {
if (i >= idx)
break;
parent_vp_debugfs_remove(i, parent_vp_stats[i]);
}
partition_debugfs_remove(hv_current_partition_id,
parent_partition_stats);
kfree(parent_vp_stats);
parent_vp_stats = NULL;
parent_partition_stats = NULL;
remove_debugfs_partition:
debugfs_remove_recursive(mshv_debugfs_partition);
mshv_debugfs_partition = NULL;
return err;
}
static int hv_stats_show(struct seq_file *m, void *v)
{
const struct hv_stats_page *stats = m->private;
int idx;
for (idx = 0; idx < ARRAY_SIZE(hv_hypervisor_counters); idx++) {
char *name = hv_hypervisor_counters[idx];
if (!name)
continue;
seq_printf(m, "%-27s: %llu\n", name, stats->data[idx]);
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(hv_stats);
static void mshv_hv_stats_unmap(void)
{
union hv_stats_object_identity identity = {
.hv.stats_area_type = HV_STATS_AREA_SELF,
};
int err;
err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity);
if (err)
pr_err("%s: failed to unmap hypervisor stats: %d\n",
__func__, err);
}
static void * __init mshv_hv_stats_map(void)
{
union hv_stats_object_identity identity = {
.hv.stats_area_type = HV_STATS_AREA_SELF,
};
struct hv_stats_page *stats;
int err;
err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats);
if (err) {
pr_err("%s: failed to map hypervisor stats: %d\n",
__func__, err);
return ERR_PTR(err);
}
return stats;
}
static int __init mshv_debugfs_hv_stats_create(struct dentry *parent)
{
struct dentry *dentry;
u64 *stats;
int err;
stats = mshv_hv_stats_map();
if (IS_ERR(stats))
return PTR_ERR(stats);
dentry = debugfs_create_file("stats", 0400, parent,
stats, &hv_stats_fops);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
pr_err("%s: failed to create hypervisor stats dentry: %d\n",
__func__, err);
goto unmap_hv_stats;
}
mshv_lps_count = stats[HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS];
return 0;
unmap_hv_stats:
mshv_hv_stats_unmap();
return err;
}
int mshv_debugfs_vp_create(struct mshv_vp *vp)
{
struct mshv_partition *p = vp->vp_partition;
if (!mshv_debugfs)
return 0;
return vp_debugfs_create(p->pt_id, vp->vp_index,
vp->vp_stats_pages,
&vp->vp_stats_dentry,
p->pt_vp_dentry);
}
void mshv_debugfs_vp_remove(struct mshv_vp *vp)
{
if (!mshv_debugfs)
return;
vp_debugfs_remove(vp->vp_stats_dentry);
}
int mshv_debugfs_partition_create(struct mshv_partition *partition)
{
int err;
if (!mshv_debugfs)
return 0;
err = partition_debugfs_create(partition->pt_id,
&partition->pt_vp_dentry,
&partition->pt_stats_dentry,
mshv_debugfs_partition);
if (err)
return err;
return 0;
}
void mshv_debugfs_partition_remove(struct mshv_partition *partition)
{
if (!mshv_debugfs)
return;
partition_debugfs_remove(partition->pt_id,
partition->pt_stats_dentry);
}
int __init mshv_debugfs_init(void)
{
int err;
mshv_debugfs = debugfs_create_dir("mshv", NULL);
if (IS_ERR(mshv_debugfs)) {
pr_err("%s: failed to create debugfs directory\n", __func__);
return PTR_ERR(mshv_debugfs);
}
if (hv_root_partition()) {
err = mshv_debugfs_hv_stats_create(mshv_debugfs);
if (err)
goto remove_mshv_dir;
err = mshv_debugfs_lp_create(mshv_debugfs);
if (err)
goto unmap_hv_stats;
}
err = mshv_debugfs_parent_partition_create();
if (err)
goto unmap_lp_stats;
return 0;
unmap_lp_stats:
if (hv_root_partition()) {
mshv_debugfs_lp_remove();
mshv_debugfs_lp = NULL;
}
unmap_hv_stats:
if (hv_root_partition())
mshv_hv_stats_unmap();
remove_mshv_dir:
debugfs_remove_recursive(mshv_debugfs);
mshv_debugfs = NULL;
return err;
}
void mshv_debugfs_exit(void)
{
mshv_debugfs_parent_partition_remove();
if (hv_root_partition()) {
mshv_debugfs_lp_remove();
mshv_debugfs_lp = NULL;
mshv_hv_stats_unmap();
}
debugfs_remove_recursive(mshv_debugfs);
mshv_debugfs = NULL;
mshv_debugfs_partition = NULL;
}

View file

@ -0,0 +1,490 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2026, Microsoft Corporation.
*
* Data for printing stats page counters via debugfs.
*
* Authors: Microsoft Linux virtualization team
*/
/*
* For simplicity, this file is included directly in mshv_debugfs.c.
* If these are ever needed elsewhere they should be compiled separately.
* Ensure this file is not used twice by accident.
*/
#ifndef MSHV_DEBUGFS_C
#error "This file should only be included in mshv_debugfs.c"
#endif
/* HV_HYPERVISOR_COUNTER */
static char *hv_hypervisor_counters[] = {
[1] = "HvLogicalProcessors",
[2] = "HvPartitions",
[3] = "HvTotalPages",
[4] = "HvVirtualProcessors",
[5] = "HvMonitoredNotifications",
[6] = "HvModernStandbyEntries",
[7] = "HvPlatformIdleTransitions",
[8] = "HvHypervisorStartupCost",
[10] = "HvIOSpacePages",
[11] = "HvNonEssentialPagesForDump",
[12] = "HvSubsumedPages",
};
/* HV_CPU_COUNTER */
static char *hv_lp_counters[] = {
[1] = "LpGlobalTime",
[2] = "LpTotalRunTime",
[3] = "LpHypervisorRunTime",
[4] = "LpHardwareInterrupts",
[5] = "LpContextSwitches",
[6] = "LpInterProcessorInterrupts",
[7] = "LpSchedulerInterrupts",
[8] = "LpTimerInterrupts",
[9] = "LpInterProcessorInterruptsSent",
[10] = "LpProcessorHalts",
[11] = "LpMonitorTransitionCost",
[12] = "LpContextSwitchTime",
[13] = "LpC1TransitionsCount",
[14] = "LpC1RunTime",
[15] = "LpC2TransitionsCount",
[16] = "LpC2RunTime",
[17] = "LpC3TransitionsCount",
[18] = "LpC3RunTime",
[19] = "LpRootVpIndex",
[20] = "LpIdleSequenceNumber",
[21] = "LpGlobalTscCount",
[22] = "LpActiveTscCount",
[23] = "LpIdleAccumulation",
[24] = "LpReferenceCycleCount0",
[25] = "LpActualCycleCount0",
[26] = "LpReferenceCycleCount1",
[27] = "LpActualCycleCount1",
[28] = "LpProximityDomainId",
[29] = "LpPostedInterruptNotifications",
[30] = "LpBranchPredictorFlushes",
#if IS_ENABLED(CONFIG_X86_64)
[31] = "LpL1DataCacheFlushes",
[32] = "LpImmediateL1DataCacheFlushes",
[33] = "LpMbFlushes",
[34] = "LpCounterRefreshSequenceNumber",
[35] = "LpCounterRefreshReferenceTime",
[36] = "LpIdleAccumulationSnapshot",
[37] = "LpActiveTscCountSnapshot",
[38] = "LpHwpRequestContextSwitches",
[39] = "LpPlaceholder1",
[40] = "LpPlaceholder2",
[41] = "LpPlaceholder3",
[42] = "LpPlaceholder4",
[43] = "LpPlaceholder5",
[44] = "LpPlaceholder6",
[45] = "LpPlaceholder7",
[46] = "LpPlaceholder8",
[47] = "LpPlaceholder9",
[48] = "LpSchLocalRunListSize",
[49] = "LpReserveGroupId",
[50] = "LpRunningPriority",
[51] = "LpPerfmonInterruptCount",
#elif IS_ENABLED(CONFIG_ARM64)
[31] = "LpCounterRefreshSequenceNumber",
[32] = "LpCounterRefreshReferenceTime",
[33] = "LpIdleAccumulationSnapshot",
[34] = "LpActiveTscCountSnapshot",
[35] = "LpHwpRequestContextSwitches",
[36] = "LpPlaceholder2",
[37] = "LpPlaceholder3",
[38] = "LpPlaceholder4",
[39] = "LpPlaceholder5",
[40] = "LpPlaceholder6",
[41] = "LpPlaceholder7",
[42] = "LpPlaceholder8",
[43] = "LpPlaceholder9",
[44] = "LpSchLocalRunListSize",
[45] = "LpReserveGroupId",
[46] = "LpRunningPriority",
#endif
};
/* HV_PROCESS_COUNTER */
static char *hv_partition_counters[] = {
[1] = "PtVirtualProcessors",
[3] = "PtTlbSize",
[4] = "PtAddressSpaces",
[5] = "PtDepositedPages",
[6] = "PtGpaPages",
[7] = "PtGpaSpaceModifications",
[8] = "PtVirtualTlbFlushEntires",
[9] = "PtRecommendedTlbSize",
[10] = "PtGpaPages4K",
[11] = "PtGpaPages2M",
[12] = "PtGpaPages1G",
[13] = "PtGpaPages512G",
[14] = "PtDevicePages4K",
[15] = "PtDevicePages2M",
[16] = "PtDevicePages1G",
[17] = "PtDevicePages512G",
[18] = "PtAttachedDevices",
[19] = "PtDeviceInterruptMappings",
[20] = "PtIoTlbFlushes",
[21] = "PtIoTlbFlushCost",
[22] = "PtDeviceInterruptErrors",
[23] = "PtDeviceDmaErrors",
[24] = "PtDeviceInterruptThrottleEvents",
[25] = "PtSkippedTimerTicks",
[26] = "PtPartitionId",
#if IS_ENABLED(CONFIG_X86_64)
[27] = "PtNestedTlbSize",
[28] = "PtRecommendedNestedTlbSize",
[29] = "PtNestedTlbFreeListSize",
[30] = "PtNestedTlbTrimmedPages",
[31] = "PtPagesShattered",
[32] = "PtPagesRecombined",
[33] = "PtHwpRequestValue",
[34] = "PtAutoSuspendEnableTime",
[35] = "PtAutoSuspendTriggerTime",
[36] = "PtAutoSuspendDisableTime",
[37] = "PtPlaceholder1",
[38] = "PtPlaceholder2",
[39] = "PtPlaceholder3",
[40] = "PtPlaceholder4",
[41] = "PtPlaceholder5",
[42] = "PtPlaceholder6",
[43] = "PtPlaceholder7",
[44] = "PtPlaceholder8",
[45] = "PtHypervisorStateTransferGeneration",
[46] = "PtNumberofActiveChildPartitions",
#elif IS_ENABLED(CONFIG_ARM64)
[27] = "PtHwpRequestValue",
[28] = "PtAutoSuspendEnableTime",
[29] = "PtAutoSuspendTriggerTime",
[30] = "PtAutoSuspendDisableTime",
[31] = "PtPlaceholder1",
[32] = "PtPlaceholder2",
[33] = "PtPlaceholder3",
[34] = "PtPlaceholder4",
[35] = "PtPlaceholder5",
[36] = "PtPlaceholder6",
[37] = "PtPlaceholder7",
[38] = "PtPlaceholder8",
[39] = "PtHypervisorStateTransferGeneration",
[40] = "PtNumberofActiveChildPartitions",
#endif
};
/* HV_THREAD_COUNTER */
static char *hv_vp_counters[] = {
[1] = "VpTotalRunTime",
[2] = "VpHypervisorRunTime",
[3] = "VpRemoteNodeRunTime",
[4] = "VpNormalizedRunTime",
[5] = "VpIdealCpu",
[7] = "VpHypercallsCount",
[8] = "VpHypercallsTime",
#if IS_ENABLED(CONFIG_X86_64)
[9] = "VpPageInvalidationsCount",
[10] = "VpPageInvalidationsTime",
[11] = "VpControlRegisterAccessesCount",
[12] = "VpControlRegisterAccessesTime",
[13] = "VpIoInstructionsCount",
[14] = "VpIoInstructionsTime",
[15] = "VpHltInstructionsCount",
[16] = "VpHltInstructionsTime",
[17] = "VpMwaitInstructionsCount",
[18] = "VpMwaitInstructionsTime",
[19] = "VpCpuidInstructionsCount",
[20] = "VpCpuidInstructionsTime",
[21] = "VpMsrAccessesCount",
[22] = "VpMsrAccessesTime",
[23] = "VpOtherInterceptsCount",
[24] = "VpOtherInterceptsTime",
[25] = "VpExternalInterruptsCount",
[26] = "VpExternalInterruptsTime",
[27] = "VpPendingInterruptsCount",
[28] = "VpPendingInterruptsTime",
[29] = "VpEmulatedInstructionsCount",
[30] = "VpEmulatedInstructionsTime",
[31] = "VpDebugRegisterAccessesCount",
[32] = "VpDebugRegisterAccessesTime",
[33] = "VpPageFaultInterceptsCount",
[34] = "VpPageFaultInterceptsTime",
[35] = "VpGuestPageTableMaps",
[36] = "VpLargePageTlbFills",
[37] = "VpSmallPageTlbFills",
[38] = "VpReflectedGuestPageFaults",
[39] = "VpApicMmioAccesses",
[40] = "VpIoInterceptMessages",
[41] = "VpMemoryInterceptMessages",
[42] = "VpApicEoiAccesses",
[43] = "VpOtherMessages",
[44] = "VpPageTableAllocations",
[45] = "VpLogicalProcessorMigrations",
[46] = "VpAddressSpaceEvictions",
[47] = "VpAddressSpaceSwitches",
[48] = "VpAddressDomainFlushes",
[49] = "VpAddressSpaceFlushes",
[50] = "VpGlobalGvaRangeFlushes",
[51] = "VpLocalGvaRangeFlushes",
[52] = "VpPageTableEvictions",
[53] = "VpPageTableReclamations",
[54] = "VpPageTableResets",
[55] = "VpPageTableValidations",
[56] = "VpApicTprAccesses",
[57] = "VpPageTableWriteIntercepts",
[58] = "VpSyntheticInterrupts",
[59] = "VpVirtualInterrupts",
[60] = "VpApicIpisSent",
[61] = "VpApicSelfIpisSent",
[62] = "VpGpaSpaceHypercalls",
[63] = "VpLogicalProcessorHypercalls",
[64] = "VpLongSpinWaitHypercalls",
[65] = "VpOtherHypercalls",
[66] = "VpSyntheticInterruptHypercalls",
[67] = "VpVirtualInterruptHypercalls",
[68] = "VpVirtualMmuHypercalls",
[69] = "VpVirtualProcessorHypercalls",
[70] = "VpHardwareInterrupts",
[71] = "VpNestedPageFaultInterceptsCount",
[72] = "VpNestedPageFaultInterceptsTime",
[73] = "VpPageScans",
[74] = "VpLogicalProcessorDispatches",
[75] = "VpWaitingForCpuTime",
[76] = "VpExtendedHypercalls",
[77] = "VpExtendedHypercallInterceptMessages",
[78] = "VpMbecNestedPageTableSwitches",
[79] = "VpOtherReflectedGuestExceptions",
[80] = "VpGlobalIoTlbFlushes",
[81] = "VpGlobalIoTlbFlushCost",
[82] = "VpLocalIoTlbFlushes",
[83] = "VpLocalIoTlbFlushCost",
[84] = "VpHypercallsForwardedCount",
[85] = "VpHypercallsForwardingTime",
[86] = "VpPageInvalidationsForwardedCount",
[87] = "VpPageInvalidationsForwardingTime",
[88] = "VpControlRegisterAccessesForwardedCount",
[89] = "VpControlRegisterAccessesForwardingTime",
[90] = "VpIoInstructionsForwardedCount",
[91] = "VpIoInstructionsForwardingTime",
[92] = "VpHltInstructionsForwardedCount",
[93] = "VpHltInstructionsForwardingTime",
[94] = "VpMwaitInstructionsForwardedCount",
[95] = "VpMwaitInstructionsForwardingTime",
[96] = "VpCpuidInstructionsForwardedCount",
[97] = "VpCpuidInstructionsForwardingTime",
[98] = "VpMsrAccessesForwardedCount",
[99] = "VpMsrAccessesForwardingTime",
[100] = "VpOtherInterceptsForwardedCount",
[101] = "VpOtherInterceptsForwardingTime",
[102] = "VpExternalInterruptsForwardedCount",
[103] = "VpExternalInterruptsForwardingTime",
[104] = "VpPendingInterruptsForwardedCount",
[105] = "VpPendingInterruptsForwardingTime",
[106] = "VpEmulatedInstructionsForwardedCount",
[107] = "VpEmulatedInstructionsForwardingTime",
[108] = "VpDebugRegisterAccessesForwardedCount",
[109] = "VpDebugRegisterAccessesForwardingTime",
[110] = "VpPageFaultInterceptsForwardedCount",
[111] = "VpPageFaultInterceptsForwardingTime",
[112] = "VpVmclearEmulationCount",
[113] = "VpVmclearEmulationTime",
[114] = "VpVmptrldEmulationCount",
[115] = "VpVmptrldEmulationTime",
[116] = "VpVmptrstEmulationCount",
[117] = "VpVmptrstEmulationTime",
[118] = "VpVmreadEmulationCount",
[119] = "VpVmreadEmulationTime",
[120] = "VpVmwriteEmulationCount",
[121] = "VpVmwriteEmulationTime",
[122] = "VpVmxoffEmulationCount",
[123] = "VpVmxoffEmulationTime",
[124] = "VpVmxonEmulationCount",
[125] = "VpVmxonEmulationTime",
[126] = "VpNestedVMEntriesCount",
[127] = "VpNestedVMEntriesTime",
[128] = "VpNestedSLATSoftPageFaultsCount",
[129] = "VpNestedSLATSoftPageFaultsTime",
[130] = "VpNestedSLATHardPageFaultsCount",
[131] = "VpNestedSLATHardPageFaultsTime",
[132] = "VpInvEptAllContextEmulationCount",
[133] = "VpInvEptAllContextEmulationTime",
[134] = "VpInvEptSingleContextEmulationCount",
[135] = "VpInvEptSingleContextEmulationTime",
[136] = "VpInvVpidAllContextEmulationCount",
[137] = "VpInvVpidAllContextEmulationTime",
[138] = "VpInvVpidSingleContextEmulationCount",
[139] = "VpInvVpidSingleContextEmulationTime",
[140] = "VpInvVpidSingleAddressEmulationCount",
[141] = "VpInvVpidSingleAddressEmulationTime",
[142] = "VpNestedTlbPageTableReclamations",
[143] = "VpNestedTlbPageTableEvictions",
[144] = "VpFlushGuestPhysicalAddressSpaceHypercalls",
[145] = "VpFlushGuestPhysicalAddressListHypercalls",
[146] = "VpPostedInterruptNotifications",
[147] = "VpPostedInterruptScans",
[148] = "VpTotalCoreRunTime",
[149] = "VpMaximumRunTime",
[150] = "VpHwpRequestContextSwitches",
[151] = "VpWaitingForCpuTimeBucket0",
[152] = "VpWaitingForCpuTimeBucket1",
[153] = "VpWaitingForCpuTimeBucket2",
[154] = "VpWaitingForCpuTimeBucket3",
[155] = "VpWaitingForCpuTimeBucket4",
[156] = "VpWaitingForCpuTimeBucket5",
[157] = "VpWaitingForCpuTimeBucket6",
[158] = "VpVmloadEmulationCount",
[159] = "VpVmloadEmulationTime",
[160] = "VpVmsaveEmulationCount",
[161] = "VpVmsaveEmulationTime",
[162] = "VpGifInstructionEmulationCount",
[163] = "VpGifInstructionEmulationTime",
[164] = "VpEmulatedErrataSvmInstructions",
[165] = "VpPlaceholder1",
[166] = "VpPlaceholder2",
[167] = "VpPlaceholder3",
[168] = "VpPlaceholder4",
[169] = "VpPlaceholder5",
[170] = "VpPlaceholder6",
[171] = "VpPlaceholder7",
[172] = "VpPlaceholder8",
[173] = "VpContentionTime",
[174] = "VpWakeUpTime",
[175] = "VpSchedulingPriority",
[176] = "VpRdpmcInstructionsCount",
[177] = "VpRdpmcInstructionsTime",
[178] = "VpPerfmonPmuMsrAccessesCount",
[179] = "VpPerfmonLbrMsrAccessesCount",
[180] = "VpPerfmonIptMsrAccessesCount",
[181] = "VpPerfmonInterruptCount",
[182] = "VpVtl1DispatchCount",
[183] = "VpVtl2DispatchCount",
[184] = "VpVtl2DispatchBucket0",
[185] = "VpVtl2DispatchBucket1",
[186] = "VpVtl2DispatchBucket2",
[187] = "VpVtl2DispatchBucket3",
[188] = "VpVtl2DispatchBucket4",
[189] = "VpVtl2DispatchBucket5",
[190] = "VpVtl2DispatchBucket6",
[191] = "VpVtl1RunTime",
[192] = "VpVtl2RunTime",
[193] = "VpIommuHypercalls",
[194] = "VpCpuGroupHypercalls",
[195] = "VpVsmHypercalls",
[196] = "VpEventLogHypercalls",
[197] = "VpDeviceDomainHypercalls",
[198] = "VpDepositHypercalls",
[199] = "VpSvmHypercalls",
[200] = "VpBusLockAcquisitionCount",
[201] = "VpLoadAvg",
[202] = "VpRootDispatchThreadBlocked",
[203] = "VpIdleCpuTime",
[204] = "VpWaitingForCpuTimeBucket7",
[205] = "VpWaitingForCpuTimeBucket8",
[206] = "VpWaitingForCpuTimeBucket9",
[207] = "VpWaitingForCpuTimeBucket10",
[208] = "VpWaitingForCpuTimeBucket11",
[209] = "VpWaitingForCpuTimeBucket12",
[210] = "VpHierarchicalSuspendTime",
[211] = "VpExpressSchedulingAttempts",
[212] = "VpExpressSchedulingCount",
#elif IS_ENABLED(CONFIG_ARM64)
[9] = "VpSysRegAccessesCount",
[10] = "VpSysRegAccessesTime",
[11] = "VpSmcInstructionsCount",
[12] = "VpSmcInstructionsTime",
[13] = "VpOtherInterceptsCount",
[14] = "VpOtherInterceptsTime",
[15] = "VpExternalInterruptsCount",
[16] = "VpExternalInterruptsTime",
[17] = "VpPendingInterruptsCount",
[18] = "VpPendingInterruptsTime",
[19] = "VpGuestPageTableMaps",
[20] = "VpLargePageTlbFills",
[21] = "VpSmallPageTlbFills",
[22] = "VpReflectedGuestPageFaults",
[23] = "VpMemoryInterceptMessages",
[24] = "VpOtherMessages",
[25] = "VpLogicalProcessorMigrations",
[26] = "VpAddressDomainFlushes",
[27] = "VpAddressSpaceFlushes",
[28] = "VpSyntheticInterrupts",
[29] = "VpVirtualInterrupts",
[30] = "VpApicSelfIpisSent",
[31] = "VpGpaSpaceHypercalls",
[32] = "VpLogicalProcessorHypercalls",
[33] = "VpLongSpinWaitHypercalls",
[34] = "VpOtherHypercalls",
[35] = "VpSyntheticInterruptHypercalls",
[36] = "VpVirtualInterruptHypercalls",
[37] = "VpVirtualMmuHypercalls",
[38] = "VpVirtualProcessorHypercalls",
[39] = "VpHardwareInterrupts",
[40] = "VpNestedPageFaultInterceptsCount",
[41] = "VpNestedPageFaultInterceptsTime",
[42] = "VpLogicalProcessorDispatches",
[43] = "VpWaitingForCpuTime",
[44] = "VpExtendedHypercalls",
[45] = "VpExtendedHypercallInterceptMessages",
[46] = "VpMbecNestedPageTableSwitches",
[47] = "VpOtherReflectedGuestExceptions",
[48] = "VpGlobalIoTlbFlushes",
[49] = "VpGlobalIoTlbFlushCost",
[50] = "VpLocalIoTlbFlushes",
[51] = "VpLocalIoTlbFlushCost",
[52] = "VpFlushGuestPhysicalAddressSpaceHypercalls",
[53] = "VpFlushGuestPhysicalAddressListHypercalls",
[54] = "VpPostedInterruptNotifications",
[55] = "VpPostedInterruptScans",
[56] = "VpTotalCoreRunTime",
[57] = "VpMaximumRunTime",
[58] = "VpWaitingForCpuTimeBucket0",
[59] = "VpWaitingForCpuTimeBucket1",
[60] = "VpWaitingForCpuTimeBucket2",
[61] = "VpWaitingForCpuTimeBucket3",
[62] = "VpWaitingForCpuTimeBucket4",
[63] = "VpWaitingForCpuTimeBucket5",
[64] = "VpWaitingForCpuTimeBucket6",
[65] = "VpHwpRequestContextSwitches",
[66] = "VpPlaceholder2",
[67] = "VpPlaceholder3",
[68] = "VpPlaceholder4",
[69] = "VpPlaceholder5",
[70] = "VpPlaceholder6",
[71] = "VpPlaceholder7",
[72] = "VpPlaceholder8",
[73] = "VpContentionTime",
[74] = "VpWakeUpTime",
[75] = "VpSchedulingPriority",
[76] = "VpVtl1DispatchCount",
[77] = "VpVtl2DispatchCount",
[78] = "VpVtl2DispatchBucket0",
[79] = "VpVtl2DispatchBucket1",
[80] = "VpVtl2DispatchBucket2",
[81] = "VpVtl2DispatchBucket3",
[82] = "VpVtl2DispatchBucket4",
[83] = "VpVtl2DispatchBucket5",
[84] = "VpVtl2DispatchBucket6",
[85] = "VpVtl1RunTime",
[86] = "VpVtl2RunTime",
[87] = "VpIommuHypercalls",
[88] = "VpCpuGroupHypercalls",
[89] = "VpVsmHypercalls",
[90] = "VpEventLogHypercalls",
[91] = "VpDeviceDomainHypercalls",
[92] = "VpDepositHypercalls",
[93] = "VpSvmHypercalls",
[94] = "VpLoadAvg",
[95] = "VpRootDispatchThreadBlocked",
[96] = "VpIdleCpuTime",
[97] = "VpWaitingForCpuTimeBucket7",
[98] = "VpWaitingForCpuTimeBucket8",
[99] = "VpWaitingForCpuTimeBucket9",
[100] = "VpWaitingForCpuTimeBucket10",
[101] = "VpWaitingForCpuTimeBucket11",
[102] = "VpWaitingForCpuTimeBucket12",
[103] = "VpHierarchicalSuspendTime",
[104] = "VpExpressSchedulingAttempts",
[105] = "VpExpressSchedulingCount",
#endif
};

View file

@ -87,8 +87,9 @@ static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
idx = srcu_read_lock(&partition->pt_irq_srcu);
hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list,
irqfd_resampler_hnode) {
hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
irqfd_resampler_hnode,
srcu_read_lock_held(&partition->pt_irq_srcu)) {
if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
hv_call_clear_virtual_interrupt(partition->pt_id);
@ -128,8 +129,8 @@ static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector)
new_iv.vector[new_iv.vector_count++] = vector;
if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
iv.as_uint64, new_iv.as_uint64) != iv.as_uint64)
if (!try_cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
&iv.as_uint64, new_iv.as_uint64))
return -EAGAIN;
return 0;
@ -247,12 +248,13 @@ static void mshv_irqfd_shutdown(struct work_struct *work)
{
struct mshv_irqfd *irqfd =
container_of(work, struct mshv_irqfd, irqfd_shutdown);
u64 cnt;
/*
* Synchronize with the wait-queue and unhook ourselves to prevent
* further events.
*/
remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait);
eventfd_ctx_remove_wait_queue(irqfd->irqfd_eventfd_ctx, &irqfd->irqfd_wait, &cnt);
if (irqfd->irqfd_resampler) {
mshv_irqfd_resampler_shutdown(irqfd);
@ -295,13 +297,13 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
{
struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd,
irqfd_wait);
unsigned long flags = (unsigned long)key;
__poll_t flags = key_to_poll(key);
int idx;
unsigned int seq;
struct mshv_partition *pt = irqfd->irqfd_partn;
int ret = 0;
if (flags & POLLIN) {
if (flags & EPOLLIN) {
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
@ -320,7 +322,7 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
ret = 1;
}
if (flags & POLLHUP) {
if (flags & EPOLLHUP) {
/* The eventfd is closing, detach from the partition */
unsigned long flags;
@ -371,8 +373,6 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,
struct mshv_irqfd *irqfd =
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
irqfd->irqfd_wqh = wqh;
/*
* TODO: Ensure there isn't already an exclusive, priority waiter, e.g.
* that the irqfd isn't already bound to another partition. Only the
@ -506,7 +506,7 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
*/
events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
if (events & POLLIN)
if (events & EPOLLIN)
mshv_assert_irq_slow(irqfd);
srcu_read_unlock(&pt->pt_irq_srcu, idx);

View file

@ -32,7 +32,6 @@ struct mshv_irqfd {
struct mshv_lapic_irq irqfd_lapic_irq;
struct hlist_node irqfd_hnode;
poll_table irqfd_polltbl;
wait_queue_head_t *irqfd_wqh;
wait_queue_entry_t irqfd_wait;
struct work_struct irqfd_shutdown;
struct mshv_irqfd_resampler *irqfd_resampler;

View file

@ -88,7 +88,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
struct page *page;
int stride, ret;
page = region->pages[page_offset];
page = region->mreg_pages[page_offset];
if (!page)
return -EINVAL;
@ -98,7 +98,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
/* Start at stride since the first stride is validated */
for (count = stride; count < page_count; count += stride) {
page = region->pages[page_offset + count];
page = region->mreg_pages[page_offset + count];
/* Break if current page is not present */
if (!page)
@ -152,7 +152,7 @@ static int mshv_region_process_range(struct mshv_mem_region *region,
while (page_count) {
/* Skip non-present pages */
if (!region->pages[page_offset]) {
if (!region->mreg_pages[page_offset]) {
page_offset++;
page_count--;
continue;
@ -190,7 +190,7 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE))
region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE;
kref_init(&region->refcount);
kref_init(&region->mreg_refcount);
return region;
}
@ -204,7 +204,7 @@ static int mshv_region_chunk_share(struct mshv_mem_region *region,
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
return hv_call_modify_spa_host_access(region->partition->pt_id,
region->pages + page_offset,
region->mreg_pages + page_offset,
page_count,
HV_MAP_GPA_READABLE |
HV_MAP_GPA_WRITABLE,
@ -229,7 +229,7 @@ static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
return hv_call_modify_spa_host_access(region->partition->pt_id,
region->pages + page_offset,
region->mreg_pages + page_offset,
page_count, 0,
flags, false);
}
@ -254,7 +254,7 @@ static int mshv_region_chunk_remap(struct mshv_mem_region *region,
return hv_call_map_gpa_pages(region->partition->pt_id,
region->start_gfn + page_offset,
page_count, flags,
region->pages + page_offset);
region->mreg_pages + page_offset);
}
static int mshv_region_remap_pages(struct mshv_mem_region *region,
@ -277,10 +277,10 @@ int mshv_region_map(struct mshv_mem_region *region)
static void mshv_region_invalidate_pages(struct mshv_mem_region *region,
u64 page_offset, u64 page_count)
{
if (region->type == MSHV_REGION_TYPE_MEM_PINNED)
unpin_user_pages(region->pages + page_offset, page_count);
if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
unpin_user_pages(region->mreg_pages + page_offset, page_count);
memset(region->pages + page_offset, 0,
memset(region->mreg_pages + page_offset, 0,
page_count * sizeof(struct page *));
}
@ -297,7 +297,7 @@ int mshv_region_pin(struct mshv_mem_region *region)
int ret;
for (done_count = 0; done_count < region->nr_pages; done_count += ret) {
pages = region->pages + done_count;
pages = region->mreg_pages + done_count;
userspace_addr = region->start_uaddr +
done_count * HV_HYP_PAGE_SIZE;
nr_pages = min(region->nr_pages - done_count,
@ -348,11 +348,11 @@ static int mshv_region_unmap(struct mshv_mem_region *region)
static void mshv_region_destroy(struct kref *ref)
{
struct mshv_mem_region *region =
container_of(ref, struct mshv_mem_region, refcount);
container_of(ref, struct mshv_mem_region, mreg_refcount);
struct mshv_partition *partition = region->partition;
int ret;
if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE)
if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE)
mshv_region_movable_fini(region);
if (mshv_partition_encrypted(partition)) {
@ -374,12 +374,12 @@ static void mshv_region_destroy(struct kref *ref)
void mshv_region_put(struct mshv_mem_region *region)
{
kref_put(&region->refcount, mshv_region_destroy);
kref_put(&region->mreg_refcount, mshv_region_destroy);
}
int mshv_region_get(struct mshv_mem_region *region)
{
return kref_get_unless_zero(&region->refcount);
return kref_get_unless_zero(&region->mreg_refcount);
}
/**
@ -405,16 +405,16 @@ static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region,
int ret;
range->notifier_seq = mmu_interval_read_begin(range->notifier);
mmap_read_lock(region->mni.mm);
mmap_read_lock(region->mreg_mni.mm);
ret = hmm_range_fault(range);
mmap_read_unlock(region->mni.mm);
mmap_read_unlock(region->mreg_mni.mm);
if (ret)
return ret;
mutex_lock(&region->mutex);
mutex_lock(&region->mreg_mutex);
if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
mutex_unlock(&region->mutex);
mutex_unlock(&region->mreg_mutex);
cond_resched();
return -EBUSY;
}
@ -438,7 +438,7 @@ static int mshv_region_range_fault(struct mshv_mem_region *region,
u64 page_offset, u64 page_count)
{
struct hmm_range range = {
.notifier = &region->mni,
.notifier = &region->mreg_mni,
.default_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
};
unsigned long *pfns;
@ -461,12 +461,12 @@ static int mshv_region_range_fault(struct mshv_mem_region *region,
goto out;
for (i = 0; i < page_count; i++)
region->pages[page_offset + i] = hmm_pfn_to_page(pfns[i]);
region->mreg_pages[page_offset + i] = hmm_pfn_to_page(pfns[i]);
ret = mshv_region_remap_pages(region, region->hv_map_flags,
page_offset, page_count);
mutex_unlock(&region->mutex);
mutex_unlock(&region->mreg_mutex);
out:
kfree(pfns);
return ret;
@ -520,7 +520,7 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
{
struct mshv_mem_region *region = container_of(mni,
struct mshv_mem_region,
mni);
mreg_mni);
u64 page_offset, page_count;
unsigned long mstart, mend;
int ret = -EPERM;
@ -533,8 +533,8 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
page_count = HVPFN_DOWN(mend - mstart);
if (mmu_notifier_range_blockable(range))
mutex_lock(&region->mutex);
else if (!mutex_trylock(&region->mutex))
mutex_lock(&region->mreg_mutex);
else if (!mutex_trylock(&region->mreg_mutex))
goto out_fail;
mmu_interval_set_seq(mni, cur_seq);
@ -546,12 +546,12 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
mshv_region_invalidate_pages(region, page_offset, page_count);
mutex_unlock(&region->mutex);
mutex_unlock(&region->mreg_mutex);
return true;
out_unlock:
mutex_unlock(&region->mutex);
mutex_unlock(&region->mreg_mutex);
out_fail:
WARN_ONCE(ret,
"Failed to invalidate region %#llx-%#llx (range %#lx-%#lx, event: %u, pages %#llx-%#llx, mm: %#llx): %d\n",
@ -568,21 +568,21 @@ static const struct mmu_interval_notifier_ops mshv_region_mni_ops = {
void mshv_region_movable_fini(struct mshv_mem_region *region)
{
mmu_interval_notifier_remove(&region->mni);
mmu_interval_notifier_remove(&region->mreg_mni);
}
bool mshv_region_movable_init(struct mshv_mem_region *region)
{
int ret;
ret = mmu_interval_notifier_insert(&region->mni, current->mm,
ret = mmu_interval_notifier_insert(&region->mreg_mni, current->mm,
region->start_uaddr,
region->nr_pages << HV_HYP_PAGE_SHIFT,
&mshv_region_mni_ops);
if (ret)
return false;
mutex_init(&region->mutex);
mutex_init(&region->mreg_mutex);
return true;
}

View file

@ -52,6 +52,9 @@ struct mshv_vp {
unsigned int kicked_by_hv;
wait_queue_head_t vp_suspend_queue;
} run;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *vp_stats_dentry;
#endif
};
#define vp_fmt(fmt) "p%lluvp%u: " fmt
@ -79,16 +82,16 @@ enum mshv_region_type {
struct mshv_mem_region {
struct hlist_node hnode;
struct kref refcount;
struct kref mreg_refcount;
u64 nr_pages;
u64 start_gfn;
u64 start_uaddr;
u32 hv_map_flags;
struct mshv_partition *partition;
enum mshv_region_type type;
struct mmu_interval_notifier mni;
struct mutex mutex; /* protects region pages remapping */
struct page *pages[];
enum mshv_region_type mreg_type;
struct mmu_interval_notifier mreg_mni;
struct mutex mreg_mutex; /* protects region pages remapping */
struct page *mreg_pages[];
};
struct mshv_irq_ack_notifier {
@ -136,6 +139,10 @@ struct mshv_partition {
u64 isolation_type;
bool import_completed;
bool pt_initialized;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *pt_stats_dentry;
struct dentry *pt_vp_dentry;
#endif
};
#define pt_fmt(fmt) "p%llu: " fmt
@ -254,6 +261,16 @@ struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
void mshv_partition_put(struct mshv_partition *partition);
struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU);
static inline bool is_l1vh_parent(u64 partition_id)
{
return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF);
}
int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
struct hv_stats_page **stats_pages);
void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
struct hv_stats_page **stats_pages);
/* hypercalls */
int hv_call_withdraw_memory(u64 count, int node, u64 partition_id);
@ -307,8 +324,9 @@ int hv_call_disconnect_port(u64 connection_partition_id,
int hv_call_notify_port_ring_empty(u32 sint_index);
int hv_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
void **addr);
int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
struct hv_stats_page **addr);
int hv_unmap_stats_page(enum hv_stats_object_type type,
struct hv_stats_page *page_addr,
const union hv_stats_object_identity *identity);
int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
u64 page_struct_count, u32 host_access,
@ -316,6 +334,33 @@ int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages,
int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg,
void *property_value, size_t property_value_sz);
#if IS_ENABLED(CONFIG_DEBUG_FS)
int __init mshv_debugfs_init(void);
void mshv_debugfs_exit(void);
int mshv_debugfs_partition_create(struct mshv_partition *partition);
void mshv_debugfs_partition_remove(struct mshv_partition *partition);
int mshv_debugfs_vp_create(struct mshv_vp *vp);
void mshv_debugfs_vp_remove(struct mshv_vp *vp);
#else
static inline int __init mshv_debugfs_init(void)
{
return 0;
}
static inline void mshv_debugfs_exit(void) { }
static inline int mshv_debugfs_partition_create(struct mshv_partition *partition)
{
return 0;
}
static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { }
static inline int mshv_debugfs_vp_create(struct mshv_vp *vp)
{
return 0;
}
static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { }
#endif
extern struct mshv_root mshv_root;
extern enum hv_scheduler_type hv_scheduler_type;
extern u8 * __percpu *hv_synic_eventring_tail;

View file

@ -115,7 +115,7 @@ int hv_call_create_partition(u64 flags,
status = hv_do_hypercall(HVCALL_CREATE_PARTITION,
input, output);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
*partition_id = output->partition_id;
local_irq_restore(irq_flags);
@ -123,8 +123,7 @@ int hv_call_create_partition(u64 flags,
break;
}
local_irq_restore(irq_flags);
ret = hv_call_deposit_pages(NUMA_NO_NODE,
hv_current_partition_id, 1);
ret = hv_deposit_memory(hv_current_partition_id, status);
} while (!ret);
return ret;
@ -147,11 +146,11 @@ int hv_call_initialize_partition(u64 partition_id)
status = hv_do_fast_hypercall8(HVCALL_INITIALIZE_PARTITION,
*(u64 *)&input);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@ -239,7 +238,7 @@ static int hv_do_map_gpa_hcall(u64 partition_id, u64 gfn, u64 page_struct_count,
completed = hv_repcomp(status);
if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) {
if (hv_result_needs_memory(status)) {
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id,
HV_MAP_GPA_DEPOSIT_PAGES);
if (ret)
@ -455,7 +454,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
status = hv_do_hypercall(control, input, output);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
if (hv_result_success(status) && ret_output)
memcpy(ret_output, output, sizeof(*output));
@ -465,8 +464,7 @@ int hv_call_get_vp_state(u32 vp_index, u64 partition_id,
}
local_irq_restore(flags);
ret = hv_call_deposit_pages(NUMA_NO_NODE,
partition_id, 1);
ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@ -518,15 +516,14 @@ int hv_call_set_vp_state(u32 vp_index, u64 partition_id,
status = hv_do_hypercall(control, input, NULL);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
local_irq_restore(flags);
ret = hv_result_to_errno(status);
break;
}
local_irq_restore(flags);
ret = hv_call_deposit_pages(NUMA_NO_NODE,
partition_id, 1);
ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@ -563,7 +560,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, input,
output);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
*state_page = pfn_to_page(output->map_location);
local_irq_restore(flags);
@ -573,7 +570,7 @@ static int hv_call_map_vp_state_page(u64 partition_id, u32 vp_index, u32 type,
local_irq_restore(flags);
ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
ret = hv_deposit_memory(partition_id, status);
} while (!ret);
return ret;
@ -718,12 +715,11 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
if (hv_result_success(status))
break;
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1);
ret = hv_deposit_memory(port_partition_id, status);
} while (!ret);
return ret;
@ -772,12 +768,11 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
if (hv_result_success(status))
break;
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
ret = hv_result_to_errno(status);
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
connection_partition_id, 1);
ret = hv_deposit_memory(connection_partition_id, status);
} while (!ret);
return ret;
@ -813,6 +808,13 @@ hv_call_notify_port_ring_empty(u32 sint_index)
return hv_result_to_errno(status);
}
/*
* Equivalent of hv_call_map_stats_page() for cases when the caller provides
* the map location.
*
* NOTE: This is a newer hypercall that always supports SELF and PARENT stats
* areas, unlike hv_call_map_stats_page().
*/
static int hv_call_map_stats_page2(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
u64 map_location)
@ -843,21 +845,49 @@ static int hv_call_map_stats_page2(enum hv_stats_object_type type,
if (!ret)
break;
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
if (!hv_result_needs_memory(status)) {
hv_status_debug(status, "\n");
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
hv_current_partition_id, 1);
ret = hv_deposit_memory(hv_current_partition_id, status);
} while (!ret);
return ret;
}
static int hv_call_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
void **addr)
static int
hv_stats_get_area_type(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity)
{
switch (type) {
case HV_STATS_OBJECT_HYPERVISOR:
return identity->hv.stats_area_type;
case HV_STATS_OBJECT_LOGICAL_PROCESSOR:
return identity->lp.stats_area_type;
case HV_STATS_OBJECT_PARTITION:
return identity->partition.stats_area_type;
case HV_STATS_OBJECT_VP:
return identity->vp.stats_area_type;
}
return -EINVAL;
}
/*
* Map a stats page, where the page location is provided by the hypervisor.
*
* NOTE: The concept of separate SELF and PARENT stats areas does not exist on
* older hypervisor versions. All the available stats information can be found
* on the SELF page. When attempting to map the PARENT area on a hypervisor
* that doesn't support it, return "success" but with a NULL address. The
* caller should check for this case and instead fallback to the SELF area
* alone.
*/
static int
hv_call_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
struct hv_stats_page **addr)
{
unsigned long flags;
struct hv_input_map_stats_page *input;
@ -878,15 +908,22 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type,
pfn = output->map_location;
local_irq_restore(flags);
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) {
ret = hv_result_to_errno(status);
if (!hv_result_needs_memory(status)) {
if (hv_result_success(status))
break;
return ret;
if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT &&
hv_result(status) == HV_STATUS_INVALID_PARAMETER) {
*addr = NULL;
return 0;
}
hv_status_debug(status, "\n");
return hv_result_to_errno(status);
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
hv_current_partition_id, 1);
ret = hv_deposit_memory(hv_current_partition_id, status);
if (ret)
return ret;
} while (!ret);
@ -898,7 +935,7 @@ static int hv_call_map_stats_page(enum hv_stats_object_type type,
int hv_map_stats_page(enum hv_stats_object_type type,
const union hv_stats_object_identity *identity,
void **addr)
struct hv_stats_page **addr)
{
int ret;
struct page *allocated_page = NULL;
@ -946,7 +983,8 @@ static int hv_call_unmap_stats_page(enum hv_stats_object_type type,
return hv_result_to_errno(status);
}
int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr,
int hv_unmap_stats_page(enum hv_stats_object_type type,
struct hv_stats_page *page_addr,
const union hv_stats_object_identity *identity)
{
int ret;

View file

@ -39,22 +39,12 @@ MODULE_AUTHOR("Microsoft");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv");
/* TODO move this to another file when debugfs code is added */
enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */
#if defined(CONFIG_X86)
VpRootDispatchThreadBlocked = 202,
/* HV_THREAD_COUNTER */
#if defined(CONFIG_X86_64)
#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 202
#elif defined(CONFIG_ARM64)
VpRootDispatchThreadBlocked = 94,
#define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 95
#endif
VpStatsMaxCounter
};
struct hv_stats_page {
union {
u64 vp_cntrs[VpStatsMaxCounter]; /* VP counters */
u8 data[HV_HYP_PAGE_SIZE];
};
} __packed;
struct mshv_root mshv_root;
@ -130,6 +120,7 @@ static u16 mshv_passthru_hvcalls[] = {
HVCALL_SET_VP_REGISTERS,
HVCALL_TRANSLATE_VIRTUAL_ADDRESS,
HVCALL_CLEAR_VIRTUAL_INTERRUPT,
HVCALL_SCRUB_PARTITION,
HVCALL_REGISTER_INTERCEPT_RESULT,
HVCALL_ASSERT_VIRTUAL_INTERRUPT,
HVCALL_GET_GPA_PAGES_ACCESS_STATES,
@ -261,11 +252,10 @@ static int mshv_ioctl_passthru_hvcall(struct mshv_partition *partition,
if (hv_result_success(status))
break;
if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY)
if (!hv_result_needs_memory(status))
ret = hv_result_to_errno(status);
else
ret = hv_call_deposit_pages(NUMA_NO_NODE,
pt_id, 1);
ret = hv_deposit_memory(pt_id, status);
} while (!ret);
args.status = hv_result(status);
@ -485,12 +475,11 @@ static u64 mshv_vp_interrupt_pending(struct mshv_vp *vp)
static bool mshv_vp_dispatch_thread_blocked(struct mshv_vp *vp)
{
struct hv_stats_page **stats = vp->vp_stats_pages;
u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->vp_cntrs;
u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->vp_cntrs;
u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->data;
u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->data;
if (self_vp_cntrs[VpRootDispatchThreadBlocked])
return self_vp_cntrs[VpRootDispatchThreadBlocked];
return parent_vp_cntrs[VpRootDispatchThreadBlocked];
return parent_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED] ||
self_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED];
}
static int
@ -661,7 +650,7 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp)
return false;
/* Only movable memory ranges are supported for GPA intercepts */
if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE)
if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE)
ret = mshv_region_handle_gfn_fault(region, gfn);
else
ret = false;
@ -957,23 +946,36 @@ mshv_vp_release(struct inode *inode, struct file *filp)
return 0;
}
static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
void *stats_pages[])
void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index,
struct hv_stats_page *stats_pages[])
{
union hv_stats_object_identity identity = {
.vp.partition_id = partition_id,
.vp.vp_index = vp_index,
};
int err;
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
err = hv_unmap_stats_page(HV_STATS_OBJECT_VP,
stats_pages[HV_STATS_AREA_SELF],
&identity);
if (err)
pr_err("%s: failed to unmap partition %llu vp %u self stats, err: %d\n",
__func__, partition_id, vp_index, err);
identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
if (stats_pages[HV_STATS_AREA_PARENT] != stats_pages[HV_STATS_AREA_SELF]) {
identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
err = hv_unmap_stats_page(HV_STATS_OBJECT_VP,
stats_pages[HV_STATS_AREA_PARENT],
&identity);
if (err)
pr_err("%s: failed to unmap partition %llu vp %u parent stats, err: %d\n",
__func__, partition_id, vp_index, err);
}
}
static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
void *stats_pages[])
int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
struct hv_stats_page *stats_pages[])
{
union hv_stats_object_identity identity = {
.vp.partition_id = partition_id,
@ -984,20 +986,37 @@ static int mshv_vp_stats_map(u64 partition_id, u32 vp_index,
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
&stats_pages[HV_STATS_AREA_SELF]);
if (err)
if (err) {
pr_err("%s: failed to map partition %llu vp %u self stats, err: %d\n",
__func__, partition_id, vp_index, err);
return err;
}
identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
&stats_pages[HV_STATS_AREA_PARENT]);
if (err)
goto unmap_self;
/*
* L1VH partition cannot access its vp stats in parent area.
*/
if (is_l1vh_parent(partition_id)) {
stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
} else {
identity.vp.stats_area_type = HV_STATS_AREA_PARENT;
err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity,
&stats_pages[HV_STATS_AREA_PARENT]);
if (err) {
pr_err("%s: failed to map partition %llu vp %u parent stats, err: %d\n",
__func__, partition_id, vp_index, err);
goto unmap_self;
}
if (!stats_pages[HV_STATS_AREA_PARENT])
stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF];
}
return 0;
unmap_self:
identity.vp.stats_area_type = HV_STATS_AREA_SELF;
hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity);
hv_unmap_stats_page(HV_STATS_OBJECT_VP,
stats_pages[HV_STATS_AREA_SELF],
&identity);
return err;
}
@ -1008,7 +1027,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
struct mshv_create_vp args;
struct mshv_vp *vp;
struct page *intercept_msg_page, *register_page, *ghcb_page;
void *stats_pages[2];
struct hv_stats_page *stats_pages[2];
long ret;
if (copy_from_user(&args, arg, sizeof(args)))
@ -1048,16 +1067,10 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
goto unmap_register_page;
}
/*
* This mapping of the stats page is for detecting if dispatch thread
* is blocked - only relevant for root scheduler
*/
if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) {
ret = mshv_vp_stats_map(partition->pt_id, args.vp_index,
stats_pages);
if (ret)
goto unmap_ghcb_page;
}
ret = mshv_vp_stats_map(partition->pt_id, args.vp_index,
stats_pages);
if (ret)
goto unmap_ghcb_page;
vp = kzalloc(sizeof(*vp), GFP_KERNEL);
if (!vp)
@ -1081,8 +1094,11 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available())
vp->vp_ghcb_page = page_to_virt(ghcb_page);
if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));
memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages));
ret = mshv_debugfs_vp_create(vp);
if (ret)
goto put_partition;
/*
* Keep anon_inode_getfd last: it installs fd in the file struct and
@ -1091,7 +1107,7 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp,
O_RDWR | O_CLOEXEC);
if (ret < 0)
goto put_partition;
goto remove_debugfs_vp;
/* already exclusive with the partition mutex for all ioctls */
partition->pt_vp_count++;
@ -1099,13 +1115,14 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
return ret;
remove_debugfs_vp:
mshv_debugfs_vp_remove(vp);
put_partition:
mshv_partition_put(partition);
free_vp:
kfree(vp);
unmap_stats_pages:
if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages);
mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages);
unmap_ghcb_page:
if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available())
hv_unmap_vp_state_page(partition->pt_id, args.vp_index,
@ -1176,12 +1193,12 @@ static int mshv_partition_create_region(struct mshv_partition *partition,
return PTR_ERR(rg);
if (is_mmio)
rg->type = MSHV_REGION_TYPE_MMIO;
rg->mreg_type = MSHV_REGION_TYPE_MMIO;
else if (mshv_partition_encrypted(partition) ||
!mshv_region_movable_init(rg))
rg->type = MSHV_REGION_TYPE_MEM_PINNED;
rg->mreg_type = MSHV_REGION_TYPE_MEM_PINNED;
else
rg->type = MSHV_REGION_TYPE_MEM_MOVABLE;
rg->mreg_type = MSHV_REGION_TYPE_MEM_MOVABLE;
rg->partition = partition;
@ -1298,7 +1315,7 @@ mshv_map_user_memory(struct mshv_partition *partition,
if (ret)
return ret;
switch (region->type) {
switch (region->mreg_type) {
case MSHV_REGION_TYPE_MEM_PINNED:
ret = mshv_prepare_pinned_region(region);
break;
@ -1542,10 +1559,16 @@ mshv_partition_ioctl_initialize(struct mshv_partition *partition)
if (ret)
goto withdraw_mem;
ret = mshv_debugfs_partition_create(partition);
if (ret)
goto finalize_partition;
partition->pt_initialized = true;
return 0;
finalize_partition:
hv_call_finalize_partition(partition->pt_id);
withdraw_mem:
hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id);
@ -1725,9 +1748,9 @@ static void destroy_partition(struct mshv_partition *partition)
if (!vp)
continue;
if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT)
mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
(void **)vp->vp_stats_pages);
mshv_debugfs_vp_remove(vp);
mshv_vp_stats_unmap(partition->pt_id, vp->vp_index,
vp->vp_stats_pages);
if (vp->vp_register_page) {
(void)hv_unmap_vp_state_page(partition->pt_id,
@ -1759,6 +1782,8 @@ static void destroy_partition(struct mshv_partition *partition)
partition->pt_vp_array[i] = NULL;
}
mshv_debugfs_partition_remove(partition);
/* Deallocates and unmaps everything including vcpus, GPA mappings etc */
hv_call_finalize_partition(partition->pt_id);
@ -1921,6 +1946,10 @@ static long mshv_ioctl_process_pt_flags(void __user *user_arg, u64 *pt_flags,
*pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE;
if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES))
*pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED;
if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION))
*pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE;
if (args.pt_flags & BIT(MSHV_PT_BIT_SMT_ENABLED_GUEST))
*pt_flags |= HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST;
isol_props->as_uint64 = 0;
@ -2054,6 +2083,29 @@ static const char *scheduler_type_to_string(enum hv_scheduler_type type)
};
}
static int __init l1vh_retrieve_scheduler_type(enum hv_scheduler_type *out)
{
u64 integrated_sched_enabled;
int ret;
*out = HV_SCHEDULER_TYPE_CORE_SMT;
if (!mshv_root.vmm_caps.vmm_enable_integrated_scheduler)
return 0;
ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED,
0, &integrated_sched_enabled,
sizeof(integrated_sched_enabled));
if (ret)
return ret;
if (integrated_sched_enabled)
*out = HV_SCHEDULER_TYPE_ROOT;
return 0;
}
/* TODO move this to hv_common.c when needed outside */
static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out)
{
@ -2086,13 +2138,12 @@ static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out)
/* Retrieve and stash the supported scheduler type */
static int __init mshv_retrieve_scheduler_type(struct device *dev)
{
int ret = 0;
int ret;
if (hv_l1vh_partition())
hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT;
ret = l1vh_retrieve_scheduler_type(&hv_scheduler_type);
else
ret = hv_retrieve_scheduler_type(&hv_scheduler_type);
if (ret)
return ret;
@ -2212,42 +2263,29 @@ struct notifier_block mshv_reboot_nb = {
static void mshv_root_partition_exit(void)
{
unregister_reboot_notifier(&mshv_reboot_nb);
root_scheduler_deinit();
}
static int __init mshv_root_partition_init(struct device *dev)
{
int err;
err = root_scheduler_init(dev);
if (err)
return err;
err = register_reboot_notifier(&mshv_reboot_nb);
if (err)
goto root_sched_deinit;
return 0;
root_sched_deinit:
root_scheduler_deinit();
return err;
return register_reboot_notifier(&mshv_reboot_nb);
}
static void mshv_init_vmm_caps(struct device *dev)
static int __init mshv_init_vmm_caps(struct device *dev)
{
/*
* This can only fail here if HVCALL_GET_PARTITION_PROPERTY_EX or
* HV_PARTITION_PROPERTY_VMM_CAPABILITIES are not supported. In that
* case it's valid to proceed as if all vmm_caps are disabled (zero).
*/
if (hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
HV_PARTITION_PROPERTY_VMM_CAPABILITIES,
0, &mshv_root.vmm_caps,
sizeof(mshv_root.vmm_caps)))
dev_warn(dev, "Unable to get VMM capabilities\n");
int ret;
ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF,
HV_PARTITION_PROPERTY_VMM_CAPABILITIES,
0, &mshv_root.vmm_caps,
sizeof(mshv_root.vmm_caps));
if (ret && hv_l1vh_partition()) {
dev_err(dev, "Failed to get VMM capabilities: %d\n", ret);
return ret;
}
dev_dbg(dev, "vmm_caps = %#llx\n", mshv_root.vmm_caps.as_uint64[0]);
return 0;
}
static int __init mshv_parent_partition_init(void)
@ -2293,6 +2331,10 @@ static int __init mshv_parent_partition_init(void)
mshv_cpuhp_online = ret;
ret = mshv_init_vmm_caps(dev);
if (ret)
goto remove_cpu_state;
ret = mshv_retrieve_scheduler_type(dev);
if (ret)
goto remove_cpu_state;
@ -2302,11 +2344,17 @@ static int __init mshv_parent_partition_init(void)
if (ret)
goto remove_cpu_state;
mshv_init_vmm_caps(dev);
ret = root_scheduler_init(dev);
if (ret)
goto exit_partition;
ret = mshv_debugfs_init();
if (ret)
goto deinit_root_scheduler;
ret = mshv_irqfd_wq_init();
if (ret)
goto exit_partition;
goto exit_debugfs;
spin_lock_init(&mshv_root.pt_ht_lock);
hash_init(mshv_root.pt_htable);
@ -2315,6 +2363,10 @@ static int __init mshv_parent_partition_init(void)
return 0;
exit_debugfs:
mshv_debugfs_exit();
deinit_root_scheduler:
root_scheduler_deinit();
exit_partition:
if (hv_root_partition())
mshv_root_partition_exit();
@ -2331,8 +2383,10 @@ static void __exit mshv_parent_partition_exit(void)
{
hv_setup_mshv_handler(NULL);
mshv_port_table_fini();
mshv_debugfs_exit();
misc_deregister(&mshv_dev);
mshv_irqfd_wq_cleanup();
root_scheduler_deinit();
if (hv_root_partition())
mshv_root_partition_exit();
cpuhp_remove_state(mshv_cpuhp_online);

View file

@ -845,9 +845,10 @@ static const struct file_operations mshv_vtl_fops = {
.mmap = mshv_vtl_mmap,
};
static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask)
static void mshv_vtl_synic_mask_vmbus_sint(void *info)
{
union hv_synic_sint sint;
const u8 *mask = info;
sint.as_uint64 = 0;
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
@ -999,7 +1000,7 @@ static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *ar
if (copy_from_user(&mask, arg, sizeof(mask)))
return -EFAULT;
guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
if (mask.mask)
wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);

View file

@ -25,6 +25,7 @@
#include <linux/cpu.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
#include <linux/smpboot.h>
#include <linux/delay.h>
#include <linux/panic_notifier.h>
@ -51,7 +52,7 @@ static struct device *vmbus_root_device;
static int hyperv_cpuhp_online;
static long __percpu *vmbus_evt;
static DEFINE_PER_CPU(long, vmbus_evt);
/* Values parsed from ACPI DSDT */
int vmbus_irq;
@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message
}
}
void vmbus_isr(void)
static void __vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@ -1363,6 +1364,53 @@ void vmbus_isr(void)
add_interrupt_randomness(vmbus_interrupt);
}
static DEFINE_PER_CPU(bool, vmbus_irq_pending);
static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
static void vmbus_irqd_wake(void)
{
struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
__this_cpu_write(vmbus_irq_pending, true);
wake_up_process(tsk);
}
static void vmbus_irqd_setup(unsigned int cpu)
{
sched_set_fifo(current);
}
static int vmbus_irqd_should_run(unsigned int cpu)
{
return __this_cpu_read(vmbus_irq_pending);
}
static void run_vmbus_irqd(unsigned int cpu)
{
__this_cpu_write(vmbus_irq_pending, false);
__vmbus_isr();
}
static bool vmbus_irq_initialized;
static struct smp_hotplug_thread vmbus_irq_threads = {
.store = &vmbus_irqd,
.setup = vmbus_irqd_setup,
.thread_should_run = vmbus_irqd_should_run,
.thread_fn = run_vmbus_irqd,
.thread_comm = "vmbus_irq/%u",
};
void vmbus_isr(void)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
vmbus_irqd_wake();
} else {
lockdep_hardirq_threaded();
__vmbus_isr();
}
}
EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
@ -1462,16 +1510,21 @@ static int vmbus_bus_init(void)
* the VMbus interrupt handler.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
if (ret)
goto err_kthread;
vmbus_irq_initialized = true;
}
if (vmbus_irq == -1) {
hv_setup_vmbus_handler(vmbus_isr);
} else {
vmbus_evt = alloc_percpu(long);
ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr,
"Hyper-V VMbus", vmbus_evt);
"Hyper-V VMbus", &vmbus_evt);
if (ret) {
pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d",
vmbus_irq, ret);
free_percpu(vmbus_evt);
goto err_setup;
}
}
@ -1500,13 +1553,16 @@ static int vmbus_bus_init(void)
return 0;
err_connect:
if (vmbus_irq == -1) {
if (vmbus_irq == -1)
hv_remove_vmbus_handler();
} else {
free_percpu_irq(vmbus_irq, vmbus_evt);
free_percpu(vmbus_evt);
}
else
free_percpu_irq(vmbus_irq, &vmbus_evt);
err_setup:
if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
smpboot_unregister_percpu_thread(&vmbus_irq_threads);
vmbus_irq_initialized = false;
}
err_kthread:
bus_unregister(&hv_bus);
return ret;
}
@ -2970,11 +3026,13 @@ static void __exit vmbus_exit(void)
vmbus_connection.conn_state = DISCONNECTED;
hv_stimer_global_cleanup();
vmbus_disconnect();
if (vmbus_irq == -1) {
if (vmbus_irq == -1)
hv_remove_vmbus_handler();
} else {
free_percpu_irq(vmbus_irq, vmbus_evt);
free_percpu(vmbus_evt);
else
free_percpu_irq(vmbus_irq, &vmbus_evt);
if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
smpboot_unregister_percpu_thread(&vmbus_irq_threads);
vmbus_irq_initialized = false;
}
for_each_online_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu

View file

@ -52,17 +52,5 @@ int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
}
EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
static void __exit exit_hv_pci_intf(void)
{
}
static int __init init_hv_pci_intf(void)
{
return 0;
}
module_init(init_hv_pci_intf);
module_exit(exit_hv_pci_intf);
MODULE_DESCRIPTION("Hyper-V PCI Interface");
MODULE_LICENSE("GPL v2");

View file

@ -501,7 +501,6 @@ struct hv_pcibus_device {
struct resource *low_mmio_res;
struct resource *high_mmio_res;
struct completion *survey_event;
struct pci_bus *pci_bus;
spinlock_t config_lock; /* Avoid two threads writing index page */
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;

View file

@ -342,6 +342,9 @@ static inline bool hv_parent_partition(void)
{
return hv_root_partition() || hv_l1vh_partition();
}
bool hv_result_needs_memory(u64 status);
int hv_deposit_memory_node(int node, u64 partition_id, u64 status);
int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
@ -350,6 +353,11 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
static inline bool hv_root_partition(void) { return false; }
static inline bool hv_l1vh_partition(void) { return false; }
static inline bool hv_parent_partition(void) { return false; }
static inline bool hv_result_needs_memory(u64 status) { return false; }
static inline int hv_deposit_memory_node(int node, u64 partition_id, u64 status)
{
return -EOPNOTSUPP;
}
static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
{
return -EOPNOTSUPP;
@ -364,6 +372,11 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3
}
#endif /* CONFIG_MSHV_ROOT */
static inline int hv_deposit_memory(u64 partition_id, u64 status)
{
return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
}
#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
u8 __init get_vtl(void);
#else

View file

@ -14,33 +14,36 @@ struct hv_u128 {
} __packed;
/* NOTE: when adding below, update hv_result_to_string() */
#define HV_STATUS_SUCCESS 0x0
#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
#define HV_STATUS_INVALID_ALIGNMENT 0x4
#define HV_STATUS_INVALID_PARAMETER 0x5
#define HV_STATUS_ACCESS_DENIED 0x6
#define HV_STATUS_INVALID_PARTITION_STATE 0x7
#define HV_STATUS_OPERATION_DENIED 0x8
#define HV_STATUS_UNKNOWN_PROPERTY 0x9
#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
#define HV_STATUS_INVALID_PARTITION_ID 0xD
#define HV_STATUS_INVALID_VP_INDEX 0xE
#define HV_STATUS_NOT_FOUND 0x10
#define HV_STATUS_INVALID_PORT_ID 0x11
#define HV_STATUS_INVALID_CONNECTION_ID 0x12
#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
#define HV_STATUS_INVALID_VP_STATE 0x15
#define HV_STATUS_NO_RESOURCES 0x1D
#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
#define HV_STATUS_INVALID_LP_INDEX 0x41
#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
#define HV_STATUS_OPERATION_FAILED 0x71
#define HV_STATUS_TIME_OUT 0x78
#define HV_STATUS_CALL_PENDING 0x79
#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
#define HV_STATUS_SUCCESS 0x0
#define HV_STATUS_INVALID_HYPERCALL_CODE 0x2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3
#define HV_STATUS_INVALID_ALIGNMENT 0x4
#define HV_STATUS_INVALID_PARAMETER 0x5
#define HV_STATUS_ACCESS_DENIED 0x6
#define HV_STATUS_INVALID_PARTITION_STATE 0x7
#define HV_STATUS_OPERATION_DENIED 0x8
#define HV_STATUS_UNKNOWN_PROPERTY 0x9
#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA
#define HV_STATUS_INSUFFICIENT_MEMORY 0xB
#define HV_STATUS_INVALID_PARTITION_ID 0xD
#define HV_STATUS_INVALID_VP_INDEX 0xE
#define HV_STATUS_NOT_FOUND 0x10
#define HV_STATUS_INVALID_PORT_ID 0x11
#define HV_STATUS_INVALID_CONNECTION_ID 0x12
#define HV_STATUS_INSUFFICIENT_BUFFERS 0x13
#define HV_STATUS_NOT_ACKNOWLEDGED 0x14
#define HV_STATUS_INVALID_VP_STATE 0x15
#define HV_STATUS_NO_RESOURCES 0x1D
#define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20
#define HV_STATUS_INVALID_LP_INDEX 0x41
#define HV_STATUS_INVALID_REGISTER_VALUE 0x50
#define HV_STATUS_OPERATION_FAILED 0x71
#define HV_STATUS_INSUFFICIENT_ROOT_MEMORY 0x73
#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75
#define HV_STATUS_TIME_OUT 0x78
#define HV_STATUS_CALL_PENDING 0x79
#define HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY 0x83
#define HV_STATUS_VTL_ALREADY_ENABLED 0x86
/*
* The Hyper-V TimeRefCount register and the TSC
@ -474,6 +477,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */
#define HVCALL_NOTIFY_PARTITION_EVENT 0x0087
#define HVCALL_ENTER_SLEEP_STATE 0x0084
#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
#define HVCALL_SCRUB_PARTITION 0x008d
#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
#define HVCALL_CREATE_PORT 0x0095

View file

@ -10,6 +10,13 @@
#include "hvhdk_mini.h"
#include "hvgdk.h"
/*
* Hypervisor statistics page format
*/
struct hv_stats_page {
u64 data[HV_HYP_PAGE_SIZE / sizeof(u64)];
} __packed;
/* Bits for dirty mask of hv_vp_register_page */
#define HV_X64_REGISTER_CLASS_GENERAL 0
#define HV_X64_REGISTER_CLASS_IP 1
@ -328,6 +335,8 @@ union hv_partition_isolation_properties {
#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2
/* Note: Exo partition is enabled by default */
#define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0)
#define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1)
#define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4)
#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8)
#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)

View file

@ -7,6 +7,8 @@
#include "hvgdk_mini.h"
#define HV_MAX_CONTIGUOUS_ALLOCATION_PAGES 8
/*
* Doorbell connection_info flags.
*/
@ -87,6 +89,9 @@ enum hv_partition_property_code {
HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001,
/* Integrated scheduling properties */
HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED = 0x00020005,
/* Resource properties */
HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005,
HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017,
@ -102,7 +107,7 @@ enum hv_partition_property_code {
};
#define HV_PARTITION_VMM_CAPABILITIES_BANK_COUNT 1
#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 59
#define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 57
struct hv_partition_property_vmm_capabilities {
u16 bank_count;
@ -119,6 +124,8 @@ struct hv_partition_property_vmm_capabilities {
u64 reservedbit3: 1;
#endif
u64 assignable_synthetic_proc_features: 1;
u64 reservedbit5: 1;
u64 vmm_enable_integrated_scheduler : 1;
u64 reserved0: HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT;
} __packed;
};

View file

@ -27,6 +27,8 @@ enum {
MSHV_PT_BIT_X2APIC,
MSHV_PT_BIT_GPA_SUPER_PAGES,
MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES,
MSHV_PT_BIT_NESTED_VIRTUALIZATION,
MSHV_PT_BIT_SMT_ENABLED_GUEST,
MSHV_PT_BIT_COUNT,
};