VFIO updates for v7.0-rc1

- Update outdated mdev comment referencing the renamed
    mdev_type_add() function. (Julia Lawall)
 
  - Introduce selftest support for IOMMU mapping of PCI MMIO BARs.
    (Alex Mastro)
 
  - Relax selftest assertion relative to differences in huge page
    handling between legacy (v1) TYPE1 IOMMU mapping behavior and
    the compatibility mode supported by IOMMUFD. (David Matlack)
 
  - Reintroduce memory poison handling support for non-struct-page-
    backed memory in the nvgrace-gpu variant driver. (Ankit Agrawal)
 
  - Replace dma_buf_phys_vec with phys_vec to avoid duplicate
    structure and semantics. (Leon Romanovsky)
 
  - Add missing upstream bridge locking across PCI function reset,
    resolving an assertion failure when secondary bus reset is used
    to provide that reset. (Anthony Pighin)
 
  - Fixes to hisi_acc vfio-pci variant driver to resolve corner case
    issues related to resets, repeated migration, and error injection
    scenarios. (Longfang Liu, Weili Qian)
 
  - Restrict vfio selftest builds to arm64 and x86_64, resolving
    compiler warnings on 32-bit archs. (Ted Logan)
 
  - Un-deprecate the fsl-mc vfio bus driver as a new maintainer has
    stepped up. (Ioana Ciornei)
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEQvbATlQL0amee4qQI5ubbjuwiyIFAmmNCcMRHGFsZXhAc2hh
 emJvdC5vcmcACgkQI5ubbjuwiyLlvw/9FLOcpjKCcxyWFPGUMHV9L0N8dWMR5t75
 Pu6cBuYdpqGgrUaa1NWHYEzFbMSkEJMb5jLj26lokn2l4VZ9BKwdehaE/7t978z2
 J0FgnGUg3B4lYm5qoBStaJ26123XafTMnsBn+wKdXt/lN6ng6GXVBxnmGP+Fuuwd
 HA3MSFB6HUFw4et8qDG3ziyboN/pSWyXaupy60zvVy9x39i4/ZzMm3PSrYPdUX4x
 aPM+lWKRi5yFMwiksZyYb67XA717Js8xhmgNMeJ8Yz3ZUF0n3Z7ZpOzbU+hl8LNn
 sAea6+lXXsvNjEXfet1mjg7A+RYmuQdcjk58J//ijRXn7zRijRM671Bzc40T2JcP
 bfrajHhprMsE+u7VwiBuERACTtbemuaKSbi5iNLHAIqTFwPpb400PvbptkyQhkxh
 IRXIxqgKb5G6/sd73m9dKR9HU7d5SL3mNCARrymgqT6kRxz8fqtaVsXbbsa1Tgah
 iV8in7wjKJ/80rYQd7gNyj/RRpYTAJJemfnJtKGQ9LxGnej8AV6kUZ3np7hpspz7
 TVtmn9RxlwbA5lWYXJ4VUzt9u2Riwd2W6jg6ZnUknSZN6B5j2Jd2bDtF/FKLauKG
 DW/bN8UU7nzgC40ro92qJEFF2PC7GkfZUVRlgW0oq54QZjyCoAIpfYOXjLTSteYP
 umnjcrWkgag=
 =F+FV
 -----END PGP SIGNATURE-----

Merge tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:
 "A small cycle with the bulk in selftests and reintroducing poison
  handling in the nvgrace-gpu driver. The rest are fixes, cleanups, and
  some dmabuf structure consolidation.

   - Update outdated mdev comment referencing the renamed
     mdev_type_add() function (Julia Lawall)

   - Introduce selftest support for IOMMU mapping of PCI MMIO BARs (Alex
     Mastro)

   - Relax selftest assertion relative to differences in huge page
     handling between legacy (v1) TYPE1 IOMMU mapping behavior and the
     compatibility mode supported by IOMMUFD (David Matlack)

   - Reintroduce memory poison handling support for non-struct-page-
     backed memory in the nvgrace-gpu variant driver (Ankit Agrawal)

   - Replace dma_buf_phys_vec with phys_vec to avoid duplicate structure
     and semantics (Leon Romanovsky)

   - Add missing upstream bridge locking across PCI function reset,
     resolving an assertion failure when secondary bus reset is used to
     provide that reset (Anthony Pighin)

   - Fixes to hisi_acc vfio-pci variant driver to resolve corner case
     issues related to resets, repeated migration, and error injection
     scenarios (Longfang Liu, Weili Qian)

   - Restrict vfio selftest builds to arm64 and x86_64, resolving
     compiler warnings on 32-bit archs (Ted Logan)

   - Un-deprecate the fsl-mc vfio bus driver as a new maintainer has
     stepped up (Ioana Ciornei)"

* tag 'vfio-v7.0-rc1' of https://github.com/awilliam/linux-vfio:
  vfio/fsl-mc: add myself as maintainer
  vfio: selftests: only build tests on arm64 and x86_64
  hisi_acc_vfio_pci: fix the queue parameter anomaly issue
  hisi_acc_vfio_pci: resolve duplicate migration states
  hisi_acc_vfio_pci: update status after RAS error
  hisi_acc_vfio_pci: fix VF reset timeout issue
  vfio/pci: Lock upstream bridge for vfio_pci_core_disable()
  types: reuse common phys_vec type instead of DMABUF open‑coded variant
  vfio/nvgrace-gpu: register device memory for poison handling
  mm: add stubs for PFNMAP memory failure registration functions
  vfio: selftests: Drop IOMMU mapping size assertions for VFIO_TYPE1_IOMMU
  vfio: selftests: Add vfio_dma_mapping_mmio_test
  vfio: selftests: Align BAR mmaps for efficient IOMMU mapping
  vfio: selftests: Centralize IOMMU mode name definitions
  vfio/mdev: update outdated comment
This commit is contained in:
Linus Torvalds 2026-02-12 15:52:39 -08:00
commit cebcffe666
26 changed files with 411 additions and 67 deletions

View file

@ -27606,8 +27606,9 @@ F: include/uapi/linux/vfio.h
F: tools/testing/selftests/vfio/
VFIO FSL-MC DRIVER
M: Ioana Ciornei <ioana.ciornei@nxp.com>
L: kvm@vger.kernel.org
S: Obsolete
S: Maintained
F: drivers/vfio/fsl-mc/
VFIO HISILICON PCI DRIVER

View file

@ -33,8 +33,8 @@ static struct scatterlist *fill_sg_entry(struct scatterlist *sgl, size_t length,
}
static unsigned int calc_sg_nents(struct dma_iova_state *state,
struct dma_buf_phys_vec *phys_vec,
size_t nr_ranges, size_t size)
struct phys_vec *phys_vec, size_t nr_ranges,
size_t size)
{
unsigned int nents = 0;
size_t i;
@ -91,7 +91,7 @@ struct dma_buf_dma {
*/
struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach,
struct p2pdma_provider *provider,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
size_t nr_ranges, size_t size,
enum dma_data_direction dir)
{

View file

@ -202,7 +202,7 @@ struct iopt_pages_dmabuf_track {
struct iopt_pages_dmabuf {
struct dma_buf_attachment *attach;
struct dma_buf_phys_vec phys;
struct phys_vec phys;
/* Always PAGE_SIZE aligned */
unsigned long start;
struct list_head tracker;

View file

@ -20,7 +20,6 @@ struct iommu_group;
struct iommu_option;
struct iommufd_device;
struct dma_buf_attachment;
struct dma_buf_phys_vec;
struct iommufd_sw_msi_map {
struct list_head sw_msi_item;
@ -718,7 +717,7 @@ int __init iommufd_test_init(void);
void iommufd_test_exit(void);
bool iommufd_selftest_is_mock_dev(struct device *dev);
int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys);
struct phys_vec *phys);
#else
static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
unsigned int ioas_id,
@ -742,7 +741,7 @@ static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
}
static inline int
iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys)
struct phys_vec *phys)
{
return -EOPNOTSUPP;
}

View file

@ -1078,7 +1078,7 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user,
}
struct pfn_reader_dmabuf {
struct dma_buf_phys_vec phys;
struct phys_vec phys;
unsigned long start_offset;
};
@ -1461,7 +1461,7 @@ static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = {
*/
static int
sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys)
struct phys_vec *phys)
{
typeof(&vfio_pci_dma_buf_iommufd_map) fn;
int rc;

View file

@ -2002,7 +2002,7 @@ static const struct dma_buf_ops iommufd_test_dmabuf_ops = {
};
int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys)
struct phys_vec *phys)
{
struct iommufd_test_dma_buf *priv = attachment->dmabuf->priv;

View file

@ -2,12 +2,9 @@ menu "VFIO support for FSL_MC bus devices"
depends on FSL_MC_BUS
config VFIO_FSL_MC
tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices (DEPRECATED)"
tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
select EVENTFD
help
The vfio-fsl-mc driver is deprecated and will be removed in a
future kernel release.
Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
(Management Complex) devices. This is required to passthrough
fsl-mc bus devices using the VFIO framework.

View file

@ -531,8 +531,6 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
struct device *dev = &mc_dev->dev;
int ret;
dev_err_once(dev, "DEPRECATION: vfio-fsl-mc is deprecated and will be removed in a future kernel release\n");
vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev,
&vfio_fsl_mc_ops);
if (IS_ERR(vdev))

View file

@ -156,7 +156,7 @@ static void mdev_type_release(struct kobject *kobj)
struct mdev_type *type = to_mdev_type(kobj);
pr_debug("Releasing group %s\n", kobj->name);
/* Pairs with the get in add_mdev_supported_type() */
/* Pairs with the get in mdev_type_add() */
put_device(type->parent->dev);
}

View file

@ -426,7 +426,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
if (ret <= 0) {
dev_err(dev, "failed to get vft qp nums\n");
return ret;
return ret < 0 ? ret : -EINVAL;
}
if (ret != vf_data->qp_num) {
@ -1188,12 +1188,34 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
return 0;
}
static void hisi_acc_vf_pci_reset_prepare(struct pci_dev *pdev)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
struct device *dev = &qm->pdev->dev;
u32 delay = 0;
/* All reset requests need to be queued for processing */
while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
msleep(1);
if (++delay > QM_RESET_WAIT_TIMEOUT) {
dev_err(dev, "reset prepare failed\n");
return;
}
}
hisi_acc_vdev->set_reset_flag = true;
}
static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
struct hisi_qm *qm = hisi_acc_vdev->pf_qm;
if (hisi_acc_vdev->core_device.vdev.migration_flags !=
VFIO_MIGRATION_STOP_COPY)
if (hisi_acc_vdev->set_reset_flag)
clear_bit(QM_RESETTING, &qm->misc_ctl);
if (!hisi_acc_vdev->core_device.vdev.mig_ops)
return;
mutex_lock(&hisi_acc_vdev->state_mutex);
@ -1547,6 +1569,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
}
hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
hisi_acc_vdev->dev_opened = true;
hisi_acc_vdev->match_done = 0;
mutex_unlock(&hisi_acc_vdev->open_mutex);
}
@ -1734,6 +1757,7 @@ static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
MODULE_DEVICE_TABLE(pci, hisi_acc_vfio_pci_table);
static const struct pci_error_handlers hisi_acc_vf_err_handlers = {
.reset_prepare = hisi_acc_vf_pci_reset_prepare,
.reset_done = hisi_acc_vf_pci_aer_reset_done,
.error_detected = vfio_pci_core_aer_err_detected,
};

View file

@ -27,6 +27,7 @@
#define ERROR_CHECK_TIMEOUT 100
#define CHECK_DELAY_TIME 100
#define QM_RESET_WAIT_TIMEOUT 60000
#define QM_SQC_VFT_BASE_SHIFT_V2 28
#define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0)
@ -128,6 +129,7 @@ struct hisi_acc_vf_migration_file {
struct hisi_acc_vf_core_device {
struct vfio_pci_core_device core_device;
u8 match_done;
bool set_reset_flag;
/*
* io_base is only valid when dev_opened is true,
* which is protected by open_mutex.

View file

@ -9,6 +9,7 @@
#include <linux/jiffies.h>
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
#include <linux/memory-failure.h>
/*
* The device memory usable to the workloads running in the VM is cached
@ -49,6 +50,7 @@ struct mem_region {
void *memaddr;
void __iomem *ioaddr;
}; /* Base virtual address of the region */
struct pfn_address_space pfn_address_space;
};
struct nvgrace_gpu_pci_core_device {
@ -88,6 +90,80 @@ nvgrace_gpu_memregion(int index,
return NULL;
}
static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev,
unsigned int index,
unsigned long pfn,
pgoff_t *pfn_offset_in_region)
{
struct mem_region *region;
unsigned long start_pfn, num_pages;
region = nvgrace_gpu_memregion(index, nvdev);
if (!region)
return -EINVAL;
start_pfn = PHYS_PFN(region->memphys);
num_pages = region->memlength >> PAGE_SHIFT;
if (pfn < start_pfn || pfn >= start_pfn + num_pages)
return -EFAULT;
*pfn_offset_in_region = pfn - start_pfn;
return 0;
}
static inline
struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma);
static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma,
unsigned long pfn,
pgoff_t *pgoff)
{
struct nvgrace_gpu_pci_core_device *nvdev;
unsigned int index =
vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
pgoff_t vma_offset_in_region = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
pgoff_t pfn_offset_in_region;
int ret;
nvdev = vma_to_nvdev(vma);
if (!nvdev)
return -ENOENT;
ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region);
if (ret)
return ret;
/* Ensure PFN is not before VMA's start within the region */
if (pfn_offset_in_region < vma_offset_in_region)
return -EFAULT;
/* Calculate offset from VMA start */
*pgoff = vma->vm_pgoff +
(pfn_offset_in_region - vma_offset_in_region);
return 0;
}
static int
nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev,
struct mem_region *region)
{
unsigned long pfn, nr_pages;
pfn = PHYS_PFN(region->memphys);
nr_pages = region->memlength >> PAGE_SHIFT;
region->pfn_address_space.node.start = pfn;
region->pfn_address_space.node.last = pfn + nr_pages - 1;
region->pfn_address_space.mapping = core_vdev->inode->i_mapping;
region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff;
return register_pfn_address_space(&region->pfn_address_space);
}
static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
@ -114,14 +190,28 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
* memory mapping.
*/
ret = vfio_pci_core_setup_barmap(vdev, 0);
if (ret) {
vfio_pci_core_disable(vdev);
return ret;
if (ret)
goto error_exit;
if (nvdev->resmem.memlength) {
ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem);
if (ret && ret != -EOPNOTSUPP)
goto error_exit;
}
vfio_pci_core_finish_enable(vdev);
ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem);
if (ret && ret != -EOPNOTSUPP)
goto register_mem_failed;
vfio_pci_core_finish_enable(vdev);
return 0;
register_mem_failed:
if (nvdev->resmem.memlength)
unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
error_exit:
vfio_pci_core_disable(vdev);
return ret;
}
static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
@ -130,6 +220,11 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
core_device.vdev);
if (nvdev->resmem.memlength)
unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
/* Unmap the mapping to the device memory cached region */
if (nvdev->usemem.memaddr) {
memunmap(nvdev->usemem.memaddr);
@ -247,6 +342,16 @@ static const struct vm_operations_struct nvgrace_gpu_vfio_pci_mmap_ops = {
#endif
};
static inline
struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma)
{
/* Check if this VMA belongs to us */
if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops)
return NULL;
return vma->vm_private_data;
}
static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{
@ -784,7 +889,7 @@ nvgrace_gpu_write(struct vfio_device *core_vdev,
static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges)
{

View file

@ -588,6 +588,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_enable);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
{
struct pci_dev *bridge;
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_dummy_resource *dummy_res, *tmp;
struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
@ -694,12 +695,20 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
* We can not use the "try" reset interface here, which will
* overwrite the previously restored configuration information.
*/
if (vdev->reset_works && pci_dev_trylock(pdev)) {
if (!__pci_reset_function_locked(pdev))
vdev->needs_reset = false;
pci_dev_unlock(pdev);
if (vdev->reset_works) {
bridge = pci_upstream_bridge(pdev);
if (bridge && !pci_dev_trylock(bridge))
goto out_restore_state;
if (pci_dev_trylock(pdev)) {
if (!__pci_reset_function_locked(pdev))
vdev->needs_reset = false;
pci_dev_unlock(pdev);
}
if (bridge)
pci_dev_unlock(bridge);
}
out_restore_state:
pci_restore_state(pdev);
out:
pci_disable_device(pdev);

View file

@ -14,7 +14,7 @@ struct vfio_pci_dma_buf {
struct vfio_pci_core_device *vdev;
struct list_head dmabufs_elm;
size_t size;
struct dma_buf_phys_vec *phys_vec;
struct phys_vec *phys_vec;
struct p2pdma_provider *provider;
u32 nr_ranges;
u8 revoked : 1;
@ -106,7 +106,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
* will fail if it is currently revoked
*/
int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys)
struct phys_vec *phys)
{
struct vfio_pci_dma_buf *priv;
@ -128,7 +128,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
}
EXPORT_SYMBOL_FOR_MODULES(vfio_pci_dma_buf_iommufd_map, "iommufd");
int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len)
@ -160,7 +160,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_fill_phys_vec);
int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges)
{

View file

@ -9,7 +9,7 @@
struct sg_table *dma_buf_phys_vec_to_sgt(struct dma_buf_attachment *attach,
struct p2pdma_provider *provider,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
size_t nr_ranges, size_t size,
enum dma_data_direction dir);
void dma_buf_free_sgt(struct dma_buf_attachment *attach, struct sg_table *sgt,

View file

@ -519,16 +519,6 @@ struct dma_buf_export_info {
void *priv;
};
/**
* struct dma_buf_phys_vec - describe continuous chunk of memory
* @paddr: physical address of that chunk
* @len: Length of this chunk
*/
struct dma_buf_phys_vec {
phys_addr_t paddr;
size_t len;
};
/**
* DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters
* @name: export-info name

View file

@ -4,8 +4,6 @@
#include <linux/interval_tree.h>
struct pfn_address_space;
struct pfn_address_space {
struct interval_tree_node node;
struct address_space *mapping;
@ -13,7 +11,18 @@ struct pfn_address_space {
unsigned long pfn, pgoff_t *pgoff);
};
#ifdef CONFIG_MEMORY_FAILURE
int register_pfn_address_space(struct pfn_address_space *pfn_space);
void unregister_pfn_address_space(struct pfn_address_space *pfn_space);
#else
static inline int register_pfn_address_space(struct pfn_address_space *pfn_space)
{
return -EOPNOTSUPP;
}
static inline void unregister_pfn_address_space(struct pfn_address_space *pfn_space)
{
}
#endif /* CONFIG_MEMORY_FAILURE */
#endif /* _LINUX_MEMORY_FAILURE_H */

View file

@ -28,7 +28,6 @@
struct vfio_pci_core_device;
struct vfio_pci_region;
struct p2pdma_provider;
struct dma_buf_phys_vec;
struct dma_buf_attachment;
struct vfio_pci_eventfd {
@ -62,25 +61,25 @@ struct vfio_pci_device_ops {
int (*get_dmabuf_phys)(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges);
};
#if IS_ENABLED(CONFIG_VFIO_PCI_DMABUF)
int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
int vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len);
int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
struct p2pdma_provider **provider,
unsigned int region_index,
struct dma_buf_phys_vec *phys_vec,
struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges);
#else
static inline int
vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
vfio_pci_core_fill_phys_vec(struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges,
size_t nr_ranges, phys_addr_t start,
phys_addr_t len)
@ -89,7 +88,7 @@ vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
}
static inline int vfio_pci_core_get_dmabuf_phys(
struct vfio_pci_core_device *vdev, struct p2pdma_provider **provider,
unsigned int region_index, struct dma_buf_phys_vec *phys_vec,
unsigned int region_index, struct phys_vec *phys_vec,
struct vfio_region_dma_range *dma_ranges, size_t nr_ranges)
{
return -EOPNOTSUPP;
@ -236,6 +235,6 @@ static inline bool is_aligned_for_order(struct vm_area_struct *vma,
}
int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
struct dma_buf_phys_vec *phys);
struct phys_vec *phys);
#endif /* VFIO_PCI_CORE_H */

View file

@ -1,5 +1,13 @@
ARCH ?= $(shell uname -m)
ifeq (,$(filter $(ARCH),arm64 x86_64))
# Do nothing on unsupported architectures
include ../lib.mk
else
CFLAGS = $(KHDR_INCLUDES)
TEST_GEN_PROGS += vfio_dma_mapping_test
TEST_GEN_PROGS += vfio_dma_mapping_mmio_test
TEST_GEN_PROGS += vfio_iommufd_setup_test
TEST_GEN_PROGS += vfio_pci_device_test
TEST_GEN_PROGS += vfio_pci_device_init_perf_test
@ -27,3 +35,5 @@ TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
-include $(TEST_DEP_FILES)
EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES)
endif

View file

@ -23,4 +23,13 @@
const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
/*
* Reserve virtual address space of size at an address satisfying
* (vaddr % align) == offset.
*
* Returns the reserved vaddr. The caller is responsible for unmapping
* the returned region.
*/
void *mmap_reserve(size_t size, size_t align, size_t offset);
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */

View file

@ -61,6 +61,12 @@ iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
#define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu"
#define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu"
#define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1"
#define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2"
#define MODE_IOMMUFD "iommufd"
/*
* Generator for VFIO selftests fixture variants that replicate across all
* possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()

View file

@ -20,32 +20,32 @@
#include "../../../kselftest.h"
#include <libvfio.h>
const char *default_iommu_mode = "iommufd";
const char *default_iommu_mode = MODE_IOMMUFD;
/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
static const struct iommu_mode iommu_modes[] = {
{
.name = "vfio_type1_iommu",
.name = MODE_VFIO_TYPE1_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
.name = "vfio_type1v2_iommu",
.name = MODE_VFIO_TYPE1V2_IOMMU,
.container_path = "/dev/vfio/vfio",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
.name = "iommufd_compat_type1",
.name = MODE_IOMMUFD_COMPAT_TYPE1,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1_IOMMU,
},
{
.name = "iommufd_compat_type1v2",
.name = MODE_IOMMUFD_COMPAT_TYPE1V2,
.container_path = "/dev/iommu",
.iommu_type = VFIO_TYPE1v2_IOMMU,
},
{
.name = "iommufd",
.name = MODE_IOMMUFD,
},
};

View file

@ -2,6 +2,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <linux/align.h>
#include "../../../kselftest.h"
#include <libvfio.h>
@ -76,3 +79,25 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[])
return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
}
void *mmap_reserve(size_t size, size_t align, size_t offset)
{
void *map_base, *map_align;
size_t delta;
VFIO_ASSERT_GT(align, offset);
delta = align - offset;
map_base = mmap(NULL, size + align, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
VFIO_ASSERT_NE(map_base, MAP_FAILED);
map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta);
if (map_align > map_base)
VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0);
VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0);
return map_align;
}

View file

@ -11,10 +11,14 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/align.h>
#include <linux/iommufd.h>
#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/log2.h>
#include <linux/mman.h>
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/vfio.h>
@ -123,20 +127,38 @@ static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
{
struct vfio_pci_bar *bar = &device->bars[index];
size_t align, size;
int prot = 0;
void *vaddr;
VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
VFIO_ASSERT_NULL(bar->vaddr);
VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
prot |= PROT_READ;
if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
prot |= PROT_WRITE;
bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
size = bar->info.size;
/*
* Align BAR mmaps to improve page fault granularity during potential
* subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
* largest hugepage size across any architecture, so no benefit from
* larger alignment. BARs smaller than 1G will be aligned by their
* power-of-two size, guaranteeing sufficient alignment for smaller
* hugepages, if present.
*/
align = min_t(size_t, size, SZ_1G);
vaddr = mmap_reserve(size, align, 0);
bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
device->fd, bar->info.offset);
VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
madvise(bar->vaddr, size, MADV_HUGEPAGE);
}
static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)

View file

@ -0,0 +1,143 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
#include <uapi/linux/types.h>
#include <linux/pci_regs.h>
#include <linux/sizes.h>
#include <linux/vfio.h>
#include <libvfio.h>
#include "../kselftest_harness.h"
static const char *device_bdf;
static struct vfio_pci_bar *largest_mapped_bar(struct vfio_pci_device *device)
{
u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
struct vfio_pci_bar *largest = NULL;
u64 bar_size = 0;
for (int i = 0; i < PCI_STD_NUM_BARS; i++) {
struct vfio_pci_bar *bar = &device->bars[i];
if (!bar->vaddr)
continue;
/*
* iommu_map() maps with READ|WRITE, so require the same
* abilities for the underlying VFIO region.
*/
if ((bar->info.flags & flags) != flags)
continue;
if (bar->info.size > bar_size) {
bar_size = bar->info.size;
largest = bar;
}
}
return largest;
}
FIXTURE(vfio_dma_mapping_mmio_test) {
struct iommu *iommu;
struct vfio_pci_device *device;
struct iova_allocator *iova_allocator;
struct vfio_pci_bar *bar;
};
FIXTURE_VARIANT(vfio_dma_mapping_mmio_test) {
const char *iommu_mode;
};
#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
FIXTURE_VARIANT_ADD(vfio_dma_mapping_mmio_test, _iommu_mode) { \
.iommu_mode = #_iommu_mode, \
}
FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
FIXTURE_SETUP(vfio_dma_mapping_mmio_test)
{
self->iommu = iommu_init(variant->iommu_mode);
self->device = vfio_pci_device_init(device_bdf, self->iommu);
self->iova_allocator = iova_allocator_init(self->iommu);
self->bar = largest_mapped_bar(self->device);
if (!self->bar)
SKIP(return, "No mappable BAR found on device %s", device_bdf);
}
FIXTURE_TEARDOWN(vfio_dma_mapping_mmio_test)
{
iova_allocator_cleanup(self->iova_allocator);
vfio_pci_device_cleanup(self->device);
iommu_cleanup(self->iommu);
}
static void do_mmio_map_test(struct iommu *iommu,
struct iova_allocator *iova_allocator,
void *vaddr, size_t size)
{
struct dma_region region = {
.vaddr = vaddr,
.size = size,
.iova = iova_allocator_alloc(iova_allocator, size),
};
/*
* NOTE: Check for iommufd compat success once it lands. Native iommufd
* will never support this.
*/
if (!strcmp(iommu->mode->name, MODE_VFIO_TYPE1V2_IOMMU) ||
!strcmp(iommu->mode->name, MODE_VFIO_TYPE1_IOMMU)) {
iommu_map(iommu, &region);
iommu_unmap(iommu, &region);
} else {
VFIO_ASSERT_NE(__iommu_map(iommu, &region), 0);
VFIO_ASSERT_NE(__iommu_unmap(iommu, &region, NULL), 0);
}
}
TEST_F(vfio_dma_mapping_mmio_test, map_full_bar)
{
do_mmio_map_test(self->iommu, self->iova_allocator,
self->bar->vaddr, self->bar->info.size);
}
TEST_F(vfio_dma_mapping_mmio_test, map_partial_bar)
{
if (self->bar->info.size < 2 * getpagesize())
SKIP(return, "BAR too small (size=0x%llx)", self->bar->info.size);
do_mmio_map_test(self->iommu, self->iova_allocator,
self->bar->vaddr, getpagesize());
}
/* Test IOMMU mapping of BAR mmap with intentionally poor vaddr alignment. */
TEST_F(vfio_dma_mapping_mmio_test, map_bar_misaligned)
{
/* Limit size to bound test time for large BARs */
size_t size = min_t(size_t, self->bar->info.size, SZ_1G);
void *vaddr;
vaddr = mmap_reserve(size, SZ_1G, getpagesize());
vaddr = mmap(vaddr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED,
self->device->fd, self->bar->info.offset);
VFIO_ASSERT_NE(vaddr, MAP_FAILED);
do_mmio_map_test(self->iommu, self->iova_allocator, vaddr, size);
VFIO_ASSERT_EQ(munmap(vaddr, size), 0);
}
int main(int argc, char *argv[])
{
device_bdf = vfio_selftests_get_bdf(&argc, argv);
return test_harness_run(argc, argv);
}

View file

@ -161,12 +161,8 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
if (rc == -EOPNOTSUPP)
goto unmap;
/*
* IOMMUFD compatibility-mode does not support huge mappings when
* using VFIO_TYPE1_IOMMU.
*/
if (!strcmp(variant->iommu_mode, "iommufd_compat_type1"))
mapping_size = SZ_4K;
if (self->iommu->mode->iommu_type == VFIO_TYPE1_IOMMU)
goto unmap;
ASSERT_EQ(0, rc);
printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);