mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 02:44:41 +01:00
Merge branches 'fixes', 'arm/smmu/updates', 'intel/vt-d', 'amd/amd-vi' and 'core' into next
This commit is contained in:
commit
ad09563660
49 changed files with 2000 additions and 502 deletions
|
|
@ -2675,6 +2675,15 @@ Kernel parameters
|
|||
1 - Bypass the IOMMU for DMA.
|
||||
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
|
||||
|
||||
iommu.debug_pagealloc=
|
||||
[KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this
|
||||
parameter enables the feature at boot time. By default, it
|
||||
is disabled and the system behaves the same way as a kernel
|
||||
built without CONFIG_IOMMU_DEBUG_PAGEALLOC.
|
||||
Format: { "0" | "1" }
|
||||
0 - Sanitizer disabled.
|
||||
1 - Sanitizer enabled, expect runtime overhead.
|
||||
|
||||
io7= [HW] IO7 for Marvel-based Alpha systems
|
||||
See comment before marvel_specify_io7 in
|
||||
arch/alpha/kernel/core_marvel.c.
|
||||
|
|
|
|||
|
|
@ -13251,6 +13251,7 @@ F: drivers/iommu/
|
|||
F: include/linux/iommu.h
|
||||
F: include/linux/iova.h
|
||||
F: include/linux/of_iommu.h
|
||||
F: rust/kernel/iommu/
|
||||
|
||||
IOMMUFD
|
||||
M: Jason Gunthorpe <jgg@nvidia.com>
|
||||
|
|
|
|||
|
|
@ -384,6 +384,25 @@ config SPRD_IOMMU
|
|||
|
||||
Say Y here if you want to use the multimedia devices listed above.
|
||||
|
||||
config IOMMU_DEBUG_PAGEALLOC
|
||||
bool "Debug IOMMU mappings against page allocations"
|
||||
depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION
|
||||
help
|
||||
This enables a consistency check between the kernel page allocator and
|
||||
the IOMMU subsystem. It verifies that pages being allocated or freed
|
||||
are not currently mapped in any IOMMU domain.
|
||||
|
||||
This helps detect DMA use-after-free bugs where a driver frees a page
|
||||
but forgets to unmap it from the IOMMU, potentially allowing a device
|
||||
to overwrite memory that the kernel has repurposed.
|
||||
|
||||
These checks are best-effort and may not detect all problems.
|
||||
|
||||
Due to performance overhead, this feature is disabled by default.
|
||||
You must enable "iommu.debug_pagealloc" from the kernel command
|
||||
line to activate the runtime checks.
|
||||
|
||||
If unsure, say N.
|
||||
endif # IOMMU_SUPPORT
|
||||
|
||||
source "drivers/iommu/generic_pt/Kconfig"
|
||||
|
|
|
|||
|
|
@ -36,3 +36,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
|
|||
obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
|
||||
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
|
||||
obj-$(CONFIG_APPLE_DART) += apple-dart.o
|
||||
obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o
|
||||
|
|
|
|||
|
|
@ -30,6 +30,16 @@ config AMD_IOMMU
|
|||
your BIOS for an option to enable it or if you have an IVRS ACPI
|
||||
table.
|
||||
|
||||
config AMD_IOMMU_IOMMUFD
|
||||
bool "Enable IOMMUFD features for AMD IOMMU (EXPERIMENTAL)"
|
||||
depends on IOMMUFD
|
||||
depends on AMD_IOMMU
|
||||
help
|
||||
Support for IOMMUFD features intended to support virtual machines
|
||||
with accelerated virtual IOMMUs.
|
||||
|
||||
Say Y here if you are doing development and testing on this feature.
|
||||
|
||||
config AMD_IOMMU_DEBUGFS
|
||||
bool "Enable AMD IOMMU internals in DebugFS"
|
||||
depends on AMD_IOMMU && IOMMU_DEBUGFS
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y += iommu.o init.o quirks.o ppr.o pasid.o
|
||||
obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o
|
||||
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
|
||||
|
|
|
|||
|
|
@ -190,4 +190,37 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
|
|||
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
|
||||
struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
|
||||
|
||||
void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
|
||||
struct protection_domain *domain, u16 domid,
|
||||
struct pt_iommu_amdv1_hw_info *pt_info,
|
||||
struct dev_table_entry *new);
|
||||
void amd_iommu_update_dte(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data,
|
||||
struct dev_table_entry *new);
|
||||
|
||||
static inline void
|
||||
amd_iommu_make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *new)
|
||||
{
|
||||
struct dev_table_entry *initial_dte;
|
||||
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
|
||||
|
||||
/* All existing DTE must have V bit set */
|
||||
new->data128[0] = DTE_FLAG_V;
|
||||
new->data128[1] = 0;
|
||||
|
||||
/*
|
||||
* Restore cached persistent DTE bits, which can be set by information
|
||||
* in IVRS table. See set_dev_entry_from_acpi().
|
||||
*/
|
||||
initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
|
||||
if (initial_dte) {
|
||||
new->data128[0] |= initial_dte->data128[0];
|
||||
new->data128[1] |= initial_dte->data128[1];
|
||||
}
|
||||
}
|
||||
|
||||
/* NESTED */
|
||||
struct iommu_domain *
|
||||
amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
|
||||
const struct iommu_user_data *user_data);
|
||||
#endif /* AMD_IOMMU_H */
|
||||
|
|
|
|||
|
|
@ -17,9 +17,12 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/generic_pt/iommu.h>
|
||||
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
/*
|
||||
* Maximum number of IOMMUs supported
|
||||
*/
|
||||
|
|
@ -108,6 +111,7 @@
|
|||
|
||||
/* Extended Feature 2 Bits */
|
||||
#define FEATURE_SEVSNPIO_SUP BIT_ULL(1)
|
||||
#define FEATURE_GCR3TRPMODE BIT_ULL(3)
|
||||
#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)
|
||||
#define FEATURE_SNPAVICSUP_GAM(x) \
|
||||
(FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
|
||||
|
|
@ -186,6 +190,7 @@
|
|||
#define CONTROL_EPH_EN 45
|
||||
#define CONTROL_XT_EN 50
|
||||
#define CONTROL_INTCAPXT_EN 51
|
||||
#define CONTROL_GCR3TRPMODE 58
|
||||
#define CONTROL_IRTCACHEDIS 59
|
||||
#define CONTROL_SNPAVIC_EN 61
|
||||
|
||||
|
|
@ -350,6 +355,9 @@
|
|||
#define DTE_FLAG_V BIT_ULL(0)
|
||||
#define DTE_FLAG_TV BIT_ULL(1)
|
||||
#define DTE_FLAG_HAD (3ULL << 7)
|
||||
#define DTE_MODE_MASK GENMASK_ULL(11, 9)
|
||||
#define DTE_HOST_TRP GENMASK_ULL(51, 12)
|
||||
#define DTE_FLAG_PPR BIT_ULL(52)
|
||||
#define DTE_FLAG_GIOV BIT_ULL(54)
|
||||
#define DTE_FLAG_GV BIT_ULL(55)
|
||||
#define DTE_GLX GENMASK_ULL(57, 56)
|
||||
|
|
@ -358,7 +366,7 @@
|
|||
|
||||
#define DTE_FLAG_IOTLB BIT_ULL(32)
|
||||
#define DTE_FLAG_MASK (0x3ffULL << 32)
|
||||
#define DEV_DOMID_MASK 0xffffULL
|
||||
#define DTE_DOMID_MASK GENMASK_ULL(15, 0)
|
||||
|
||||
#define DTE_GCR3_14_12 GENMASK_ULL(60, 58)
|
||||
#define DTE_GCR3_30_15 GENMASK_ULL(31, 16)
|
||||
|
|
@ -493,6 +501,38 @@ struct pdom_iommu_info {
|
|||
u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
|
||||
};
|
||||
|
||||
struct amd_iommu_viommu {
|
||||
struct iommufd_viommu core;
|
||||
struct protection_domain *parent; /* nest parent domain for this viommu */
|
||||
struct list_head pdom_list; /* For protection_domain->viommu_list */
|
||||
|
||||
/*
|
||||
* Per-vIOMMU guest domain ID to host domain ID mapping.
|
||||
* Indexed by guest domain ID.
|
||||
*/
|
||||
struct xarray gdomid_array;
|
||||
};
|
||||
|
||||
/*
|
||||
* Contains guest domain ID mapping info,
|
||||
* which is stored in the struct xarray gdomid_array.
|
||||
*/
|
||||
struct guest_domain_mapping_info {
|
||||
refcount_t users;
|
||||
u32 hdom_id; /* Host domain ID */
|
||||
};
|
||||
|
||||
/*
|
||||
* Nested domain is specifically used for nested translation
|
||||
*/
|
||||
struct nested_domain {
|
||||
struct iommu_domain domain; /* generic domain handle used by iommu core code */
|
||||
u16 gdom_id; /* domain ID from gDTE */
|
||||
struct guest_domain_mapping_info *gdom_info;
|
||||
struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */
|
||||
struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure contains generic data for IOMMU protection domains
|
||||
* independent of their use.
|
||||
|
|
@ -513,6 +553,12 @@ struct protection_domain {
|
|||
|
||||
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
|
||||
struct list_head dev_data_list; /* List of pdom_dev_data */
|
||||
|
||||
/*
|
||||
* Store reference to list of vIOMMUs, which use this protection domain.
|
||||
* This will be used to look up host domain ID when flushing this domain.
|
||||
*/
|
||||
struct list_head viommu_list;
|
||||
};
|
||||
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, iommu, domain);
|
||||
PT_IOMMU_CHECK_DOMAIN(struct protection_domain, amdv1.iommu, domain);
|
||||
|
|
@ -706,7 +752,7 @@ struct amd_iommu {
|
|||
|
||||
u32 flags;
|
||||
volatile u64 *cmd_sem;
|
||||
atomic64_t cmd_sem_val;
|
||||
u64 cmd_sem_val;
|
||||
/*
|
||||
* Track physical address to directly use it in build_completion_wait()
|
||||
* and avoid adding any special checks and handling for kdump.
|
||||
|
|
|
|||
|
|
@ -1122,6 +1122,14 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
|
|||
return;
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_GT_EN);
|
||||
|
||||
/*
|
||||
* This feature needs to be enabled prior to a call
|
||||
* to iommu_snp_enable(). Since this function is called
|
||||
* in early_enable_iommu(), it is safe to enable here.
|
||||
*/
|
||||
if (check_feature2(FEATURE_GCR3TRPMODE))
|
||||
iommu_feature_enable(iommu, CONTROL_GCR3TRPMODE);
|
||||
}
|
||||
|
||||
/* sets a specific bit in the device table entry. */
|
||||
|
|
@ -1179,7 +1187,7 @@ static bool __reuse_device_table(struct amd_iommu *iommu)
|
|||
for (devid = 0; devid <= pci_seg->last_bdf; devid++) {
|
||||
old_dev_tbl_entry = &pci_seg->old_dev_tbl_cpy[devid];
|
||||
dte_v = FIELD_GET(DTE_FLAG_V, old_dev_tbl_entry->data[0]);
|
||||
dom_id = FIELD_GET(DEV_DOMID_MASK, old_dev_tbl_entry->data[1]);
|
||||
dom_id = FIELD_GET(DTE_DOMID_MASK, old_dev_tbl_entry->data[1]);
|
||||
|
||||
if (!dte_v || !dom_id)
|
||||
continue;
|
||||
|
|
@ -1877,7 +1885,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
|
|||
iommu->pci_seg = pci_seg;
|
||||
|
||||
raw_spin_lock_init(&iommu->lock);
|
||||
atomic64_set(&iommu->cmd_sem_val, 0);
|
||||
iommu->cmd_sem_val = 0;
|
||||
|
||||
/* Add IOMMU to internal data structures */
|
||||
list_add_tail(&iommu->list, &amd_iommu_list);
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/generic_pt/iommu.h>
|
||||
|
||||
#include "amd_iommu.h"
|
||||
#include "iommufd.h"
|
||||
#include "../irq_remapping.h"
|
||||
#include "../iommu-pages.h"
|
||||
|
||||
|
|
@ -75,6 +76,8 @@ static void set_dte_entry(struct amd_iommu *iommu,
|
|||
struct iommu_dev_data *dev_data,
|
||||
phys_addr_t top_paddr, unsigned int top_level);
|
||||
|
||||
static int device_flush_dte(struct iommu_dev_data *dev_data);
|
||||
|
||||
static void amd_iommu_change_top(struct pt_iommu *iommu_table,
|
||||
phys_addr_t top_paddr, unsigned int top_level);
|
||||
|
||||
|
|
@ -85,6 +88,10 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain);
|
|||
static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
|
||||
bool enable);
|
||||
|
||||
static void clone_aliases(struct amd_iommu *iommu, struct device *dev);
|
||||
|
||||
static int iommu_completion_wait(struct amd_iommu *iommu);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Helper functions
|
||||
|
|
@ -202,6 +209,16 @@ static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_da
|
|||
spin_unlock_irqrestore(&dev_data->dte_lock, flags);
|
||||
}
|
||||
|
||||
void amd_iommu_update_dte(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
update_dte256(iommu, dev_data, new);
|
||||
clone_aliases(iommu, dev_data->dev);
|
||||
device_flush_dte(dev_data);
|
||||
iommu_completion_wait(iommu);
|
||||
}
|
||||
|
||||
static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
|
||||
struct dev_table_entry *dte)
|
||||
{
|
||||
|
|
@ -1185,7 +1202,12 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
|
|||
{
|
||||
int i = 0;
|
||||
|
||||
while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) {
|
||||
/*
|
||||
* cmd_sem holds a monotonically non-decreasing completion sequence
|
||||
* number.
|
||||
*/
|
||||
while ((__s64)(READ_ONCE(*iommu->cmd_sem) - data) < 0 &&
|
||||
i < LOOP_TIMEOUT) {
|
||||
udelay(1);
|
||||
i += 1;
|
||||
}
|
||||
|
|
@ -1417,6 +1439,12 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
|
|||
return iommu_queue_command_sync(iommu, cmd, true);
|
||||
}
|
||||
|
||||
static u64 get_cmdsem_val(struct amd_iommu *iommu)
|
||||
{
|
||||
lockdep_assert_held(&iommu->lock);
|
||||
return ++iommu->cmd_sem_val;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function queues a completion wait command into the command
|
||||
* buffer of an IOMMU
|
||||
|
|
@ -1431,20 +1459,19 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
|
|||
if (!iommu->need_sync)
|
||||
return 0;
|
||||
|
||||
data = atomic64_inc_return(&iommu->cmd_sem_val);
|
||||
build_completion_wait(&cmd, iommu, data);
|
||||
|
||||
raw_spin_lock_irqsave(&iommu->lock, flags);
|
||||
|
||||
data = get_cmdsem_val(iommu);
|
||||
build_completion_wait(&cmd, iommu, data);
|
||||
|
||||
ret = __iommu_queue_command_sync(iommu, &cmd, false);
|
||||
raw_spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
return ret;
|
||||
|
||||
ret = wait_on_sem(iommu, data);
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -1522,6 +1549,32 @@ static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
|
|||
iommu_completion_wait(iommu);
|
||||
}
|
||||
|
||||
static int iommu_flush_pages_v1_hdom_ids(struct protection_domain *pdom, u64 address, size_t size)
|
||||
{
|
||||
int ret = 0;
|
||||
struct amd_iommu_viommu *aviommu;
|
||||
|
||||
list_for_each_entry(aviommu, &pdom->viommu_list, pdom_list) {
|
||||
unsigned long i;
|
||||
struct guest_domain_mapping_info *gdom_info;
|
||||
struct amd_iommu *iommu = container_of(aviommu->core.iommu_dev,
|
||||
struct amd_iommu, iommu);
|
||||
|
||||
xa_lock(&aviommu->gdomid_array);
|
||||
xa_for_each(&aviommu->gdomid_array, i, gdom_info) {
|
||||
struct iommu_cmd cmd;
|
||||
|
||||
pr_debug("%s: iommu=%#x, hdom_id=%#x\n", __func__,
|
||||
iommu->devid, gdom_info->hdom_id);
|
||||
build_inv_iommu_pages(&cmd, address, size, gdom_info->hdom_id,
|
||||
IOMMU_NO_PASID, false);
|
||||
ret |= iommu_queue_command(iommu, &cmd);
|
||||
}
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amd_iommu_flush_all(struct amd_iommu *iommu)
|
||||
{
|
||||
struct iommu_cmd cmd;
|
||||
|
|
@ -1670,6 +1723,17 @@ static int domain_flush_pages_v1(struct protection_domain *pdom,
|
|||
ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
* A domain w/ v1 table can be a nest parent, which can have
|
||||
* multiple nested domains. Each nested domain has 1:1 mapping
|
||||
* between gDomID and hDomID. Therefore, flush every hDomID
|
||||
* associated to this nest parent domain.
|
||||
*
|
||||
* See drivers/iommu/amd/nested.c: amd_iommu_alloc_domain_nested()
|
||||
*/
|
||||
if (!list_empty(&pdom->viommu_list))
|
||||
ret |= iommu_flush_pages_v1_hdom_ids(pdom, address, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -2010,127 +2074,112 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
/* All existing DTE must have V bit set */
|
||||
new->data128[0] = DTE_FLAG_V;
|
||||
new->data128[1] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* The old value for GCR3 table and GPT have been cleared from caller.
|
||||
*/
|
||||
static void set_dte_gcr3_table(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data,
|
||||
struct dev_table_entry *target)
|
||||
static void set_dte_gcr3_table(struct iommu_dev_data *dev_data,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
|
||||
u64 gcr3;
|
||||
u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
|
||||
|
||||
if (!gcr3_info->gcr3_tbl)
|
||||
return;
|
||||
new->data[0] |= DTE_FLAG_TV |
|
||||
(dev_data->ppr ? DTE_FLAG_PPR : 0) |
|
||||
(pdom_is_v2_pgtbl_mode(dev_data->domain) ? DTE_FLAG_GIOV : 0) |
|
||||
DTE_FLAG_GV |
|
||||
FIELD_PREP(DTE_GLX, gcr3_info->glx) |
|
||||
FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12) |
|
||||
DTE_FLAG_IR | DTE_FLAG_IW;
|
||||
|
||||
pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
|
||||
__func__, dev_data->devid, gcr3_info->glx,
|
||||
(unsigned long long)gcr3_info->gcr3_tbl);
|
||||
|
||||
gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
|
||||
|
||||
target->data[0] |= DTE_FLAG_GV |
|
||||
FIELD_PREP(DTE_GLX, gcr3_info->glx) |
|
||||
FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
|
||||
if (pdom_is_v2_pgtbl_mode(dev_data->domain))
|
||||
target->data[0] |= DTE_FLAG_GIOV;
|
||||
|
||||
target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
|
||||
FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
|
||||
new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, dev_data->gcr3_info.domid) |
|
||||
FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
|
||||
(dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0) |
|
||||
FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
|
||||
|
||||
/* Guest page table can only support 4 and 5 levels */
|
||||
if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
|
||||
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
|
||||
new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
|
||||
else
|
||||
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
|
||||
new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
|
||||
}
|
||||
|
||||
void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
|
||||
struct protection_domain *domain, u16 domid,
|
||||
struct pt_iommu_amdv1_hw_info *pt_info,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
u64 host_pt_root = __sme_set(pt_info->host_pt_root);
|
||||
|
||||
/* Note Dirty tracking is used for v1 table only for now */
|
||||
new->data[0] |= DTE_FLAG_TV |
|
||||
FIELD_PREP(DTE_MODE_MASK, pt_info->mode) |
|
||||
(domain->dirty_tracking ? DTE_FLAG_HAD : 0) |
|
||||
FIELD_PREP(DTE_HOST_TRP, host_pt_root >> 12) |
|
||||
DTE_FLAG_IR | DTE_FLAG_IW;
|
||||
|
||||
new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domid) |
|
||||
(dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0);
|
||||
}
|
||||
|
||||
static void set_dte_v1(struct iommu_dev_data *dev_data,
|
||||
struct protection_domain *domain, u16 domid,
|
||||
phys_addr_t top_paddr, unsigned int top_level,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
struct pt_iommu_amdv1_hw_info pt_info;
|
||||
|
||||
/*
|
||||
* When updating the IO pagetable, the new top and level
|
||||
* are provided as parameters. For other operations i.e.
|
||||
* device attach, retrieve the current pagetable info
|
||||
* via the IOMMU PT API.
|
||||
*/
|
||||
if (top_paddr) {
|
||||
pt_info.host_pt_root = top_paddr;
|
||||
pt_info.mode = top_level + 1;
|
||||
} else {
|
||||
WARN_ON(top_paddr || top_level);
|
||||
pt_iommu_amdv1_hw_info(&domain->amdv1, &pt_info);
|
||||
}
|
||||
|
||||
amd_iommu_set_dte_v1(dev_data, domain, domid, &pt_info, new);
|
||||
}
|
||||
|
||||
static void set_dte_passthrough(struct iommu_dev_data *dev_data,
|
||||
struct protection_domain *domain,
|
||||
struct dev_table_entry *new)
|
||||
{
|
||||
new->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW;
|
||||
|
||||
new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domain->id) |
|
||||
(dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0;
|
||||
}
|
||||
|
||||
static void set_dte_entry(struct amd_iommu *iommu,
|
||||
struct iommu_dev_data *dev_data,
|
||||
phys_addr_t top_paddr, unsigned int top_level)
|
||||
{
|
||||
u16 domid;
|
||||
u32 old_domid;
|
||||
struct dev_table_entry *initial_dte;
|
||||
struct dev_table_entry new = {};
|
||||
struct protection_domain *domain = dev_data->domain;
|
||||
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
|
||||
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
|
||||
struct pt_iommu_amdv1_hw_info pt_info;
|
||||
|
||||
make_clear_dte(dev_data, dte, &new);
|
||||
amd_iommu_make_clear_dte(dev_data, &new);
|
||||
|
||||
if (gcr3_info && gcr3_info->gcr3_tbl)
|
||||
domid = dev_data->gcr3_info.domid;
|
||||
else {
|
||||
domid = domain->id;
|
||||
old_domid = READ_ONCE(dte->data[1]) & DTE_DOMID_MASK;
|
||||
if (gcr3_info->gcr3_tbl)
|
||||
set_dte_gcr3_table(dev_data, &new);
|
||||
else if (domain->domain.type == IOMMU_DOMAIN_IDENTITY)
|
||||
set_dte_passthrough(dev_data, domain, &new);
|
||||
else if ((domain->domain.type & __IOMMU_DOMAIN_PAGING) &&
|
||||
domain->pd_mode == PD_MODE_V1)
|
||||
set_dte_v1(dev_data, domain, domain->id, top_paddr, top_level, &new);
|
||||
else
|
||||
WARN_ON(true);
|
||||
|
||||
if (domain->domain.type & __IOMMU_DOMAIN_PAGING) {
|
||||
/*
|
||||
* When updating the IO pagetable, the new top and level
|
||||
* are provided as parameters. For other operations i.e.
|
||||
* device attach, retrieve the current pagetable info
|
||||
* via the IOMMU PT API.
|
||||
*/
|
||||
if (top_paddr) {
|
||||
pt_info.host_pt_root = top_paddr;
|
||||
pt_info.mode = top_level + 1;
|
||||
} else {
|
||||
WARN_ON(top_paddr || top_level);
|
||||
pt_iommu_amdv1_hw_info(&domain->amdv1,
|
||||
&pt_info);
|
||||
}
|
||||
|
||||
new.data[0] |= __sme_set(pt_info.host_pt_root) |
|
||||
(pt_info.mode & DEV_ENTRY_MODE_MASK)
|
||||
<< DEV_ENTRY_MODE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
|
||||
|
||||
/*
|
||||
* When SNP is enabled, we can only support TV=1 with non-zero domain ID.
|
||||
* This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
|
||||
* do_iommu_domain_alloc().
|
||||
*/
|
||||
WARN_ON(amd_iommu_snp_en && (domid == 0));
|
||||
new.data[0] |= DTE_FLAG_TV;
|
||||
|
||||
if (dev_data->ppr)
|
||||
new.data[0] |= 1ULL << DEV_ENTRY_PPR;
|
||||
|
||||
if (domain->dirty_tracking)
|
||||
new.data[0] |= DTE_FLAG_HAD;
|
||||
|
||||
if (dev_data->ats_enabled)
|
||||
new.data[1] |= DTE_FLAG_IOTLB;
|
||||
|
||||
old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
|
||||
new.data[1] |= domid;
|
||||
|
||||
/*
|
||||
* Restore cached persistent DTE bits, which can be set by information
|
||||
* in IVRS table. See set_dev_entry_from_acpi().
|
||||
*/
|
||||
initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
|
||||
if (initial_dte) {
|
||||
new.data128[0] |= initial_dte->data128[0];
|
||||
new.data128[1] |= initial_dte->data128[1];
|
||||
}
|
||||
|
||||
set_dte_gcr3_table(iommu, dev_data, &new);
|
||||
|
||||
update_dte256(iommu, dev_data, &new);
|
||||
amd_iommu_update_dte(iommu, dev_data, &new);
|
||||
|
||||
/*
|
||||
* A kdump kernel might be replacing a domain ID that was copied from
|
||||
|
|
@ -2148,10 +2197,9 @@ static void set_dte_entry(struct amd_iommu *iommu,
|
|||
static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data)
|
||||
{
|
||||
struct dev_table_entry new = {};
|
||||
struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
|
||||
|
||||
make_clear_dte(dev_data, dte, &new);
|
||||
update_dte256(iommu, dev_data, &new);
|
||||
amd_iommu_make_clear_dte(dev_data, &new);
|
||||
amd_iommu_update_dte(iommu, dev_data, &new);
|
||||
}
|
||||
|
||||
/* Update and flush DTE for the given device */
|
||||
|
|
@ -2163,10 +2211,6 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
|
|||
set_dte_entry(iommu, dev_data, 0, 0);
|
||||
else
|
||||
clear_dte_entry(iommu, dev_data);
|
||||
|
||||
clone_aliases(iommu, dev_data->dev);
|
||||
device_flush_dte(dev_data);
|
||||
iommu_completion_wait(iommu);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -2499,6 +2543,7 @@ static void protection_domain_init(struct protection_domain *domain)
|
|||
spin_lock_init(&domain->lock);
|
||||
INIT_LIST_HEAD(&domain->dev_list);
|
||||
INIT_LIST_HEAD(&domain->dev_data_list);
|
||||
INIT_LIST_HEAD(&domain->viommu_list);
|
||||
xa_init(&domain->iommu_array);
|
||||
}
|
||||
|
||||
|
|
@ -2760,6 +2805,14 @@ static struct iommu_domain *amd_iommu_domain_alloc_paging_v2(struct device *dev,
|
|||
return &domain->domain;
|
||||
}
|
||||
|
||||
static inline bool is_nest_parent_supported(u32 flags)
|
||||
{
|
||||
/* Only allow nest parent when these features are supported */
|
||||
return check_feature(FEATURE_GT) &&
|
||||
check_feature(FEATURE_GIOSUP) &&
|
||||
check_feature2(FEATURE_GCR3TRPMODE);
|
||||
}
|
||||
|
||||
static struct iommu_domain *
|
||||
amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
|
||||
const struct iommu_user_data *user_data)
|
||||
|
|
@ -2767,16 +2820,28 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
|
|||
{
|
||||
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
|
||||
const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
|
||||
IOMMU_HWPT_ALLOC_PASID;
|
||||
IOMMU_HWPT_ALLOC_PASID |
|
||||
IOMMU_HWPT_ALLOC_NEST_PARENT;
|
||||
|
||||
if ((flags & ~supported_flags) || user_data)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
switch (flags & supported_flags) {
|
||||
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
|
||||
/* Allocate domain with v1 page table for dirty tracking */
|
||||
if (!amd_iommu_hd_support(iommu))
|
||||
case IOMMU_HWPT_ALLOC_NEST_PARENT:
|
||||
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_NEST_PARENT:
|
||||
/*
|
||||
* Allocate domain with v1 page table for dirty tracking
|
||||
* and/or Nest parent.
|
||||
*/
|
||||
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
|
||||
!amd_iommu_hd_support(iommu))
|
||||
break;
|
||||
|
||||
if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
|
||||
!is_nest_parent_supported(flags))
|
||||
break;
|
||||
|
||||
return amd_iommu_domain_alloc_paging_v1(dev, flags);
|
||||
case IOMMU_HWPT_ALLOC_PASID:
|
||||
/* Allocate domain with v2 page table if IOMMU supports PASID. */
|
||||
|
|
@ -3078,6 +3143,7 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
|
|||
|
||||
const struct iommu_ops amd_iommu_ops = {
|
||||
.capable = amd_iommu_capable,
|
||||
.hw_info = amd_iommufd_hw_info,
|
||||
.blocked_domain = &blocked_domain,
|
||||
.release_domain = &blocked_domain,
|
||||
.identity_domain = &identity_domain.domain,
|
||||
|
|
@ -3090,6 +3156,8 @@ const struct iommu_ops amd_iommu_ops = {
|
|||
.is_attach_deferred = amd_iommu_is_attach_deferred,
|
||||
.def_domain_type = amd_iommu_def_domain_type,
|
||||
.page_response = amd_iommu_page_response,
|
||||
.get_viommu_size = amd_iommufd_get_viommu_size,
|
||||
.viommu_init = amd_iommufd_viommu_init,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
|
|
@ -3114,18 +3182,23 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
|
|||
return;
|
||||
|
||||
build_inv_irt(&cmd, devid);
|
||||
data = atomic64_inc_return(&iommu->cmd_sem_val);
|
||||
build_completion_wait(&cmd2, iommu, data);
|
||||
|
||||
raw_spin_lock_irqsave(&iommu->lock, flags);
|
||||
data = get_cmdsem_val(iommu);
|
||||
build_completion_wait(&cmd2, iommu, data);
|
||||
|
||||
ret = __iommu_queue_command_sync(iommu, &cmd, true);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_err;
|
||||
ret = __iommu_queue_command_sync(iommu, &cmd2, false);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_err;
|
||||
raw_spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
wait_on_sem(iommu, data);
|
||||
out:
|
||||
return;
|
||||
|
||||
out_err:
|
||||
raw_spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
}
|
||||
|
||||
|
|
@ -3239,7 +3312,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
|
|||
struct irq_remap_table *new_table = NULL;
|
||||
struct amd_iommu_pci_seg *pci_seg;
|
||||
unsigned long flags;
|
||||
int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
|
||||
int nid = iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
|
||||
u16 alias;
|
||||
|
||||
spin_lock_irqsave(&iommu_table_lock, flags);
|
||||
|
|
|
|||
77
drivers/iommu/amd/iommufd.c
Normal file
77
drivers/iommu/amd/iommufd.c
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#include "iommufd.h"
|
||||
#include "amd_iommu.h"
|
||||
#include "amd_iommu_types.h"
|
||||
|
||||
static const struct iommufd_viommu_ops amd_viommu_ops;
|
||||
|
||||
void *amd_iommufd_hw_info(struct device *dev, u32 *length, enum iommu_hw_info_type *type)
|
||||
{
|
||||
struct iommu_hw_info_amd *hwinfo;
|
||||
|
||||
if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
|
||||
*type != IOMMU_HW_INFO_TYPE_AMD)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
hwinfo = kzalloc(sizeof(*hwinfo), GFP_KERNEL);
|
||||
if (!hwinfo)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
*length = sizeof(*hwinfo);
|
||||
*type = IOMMU_HW_INFO_TYPE_AMD;
|
||||
|
||||
hwinfo->efr = amd_iommu_efr;
|
||||
hwinfo->efr2 = amd_iommu_efr2;
|
||||
|
||||
return hwinfo;
|
||||
}
|
||||
|
||||
size_t amd_iommufd_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type)
|
||||
{
|
||||
return VIOMMU_STRUCT_SIZE(struct amd_iommu_viommu, core);
|
||||
}
|
||||
|
||||
int amd_iommufd_viommu_init(struct iommufd_viommu *viommu, struct iommu_domain *parent,
|
||||
const struct iommu_user_data *user_data)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct protection_domain *pdom = to_pdomain(parent);
|
||||
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
|
||||
|
||||
xa_init_flags(&aviommu->gdomid_array, XA_FLAGS_ALLOC1);
|
||||
aviommu->parent = pdom;
|
||||
|
||||
viommu->ops = &amd_viommu_ops;
|
||||
|
||||
spin_lock_irqsave(&pdom->lock, flags);
|
||||
list_add(&aviommu->pdom_list, &pdom->viommu_list);
|
||||
spin_unlock_irqrestore(&pdom->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_iommufd_viommu_destroy(struct iommufd_viommu *viommu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
|
||||
struct protection_domain *pdom = aviommu->parent;
|
||||
|
||||
spin_lock_irqsave(&pdom->lock, flags);
|
||||
list_del(&aviommu->pdom_list);
|
||||
spin_unlock_irqrestore(&pdom->lock, flags);
|
||||
xa_destroy(&aviommu->gdomid_array);
|
||||
}
|
||||
|
||||
/*
|
||||
* See include/linux/iommufd.h
|
||||
* struct iommufd_viommu_ops - vIOMMU specific operations
|
||||
*/
|
||||
static const struct iommufd_viommu_ops amd_viommu_ops = {
|
||||
.destroy = amd_iommufd_viommu_destroy,
|
||||
};
|
||||
20
drivers/iommu/amd/iommufd.h
Normal file
20
drivers/iommu/amd/iommufd.h
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef AMD_IOMMUFD_H
|
||||
#define AMD_IOMMUFD_H
|
||||
|
||||
#if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
|
||||
void *amd_iommufd_hw_info(struct device *dev, u32 *length, enum iommu_hw_info_type *type);
|
||||
size_t amd_iommufd_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type);
|
||||
int amd_iommufd_viommu_init(struct iommufd_viommu *viommu, struct iommu_domain *parent,
|
||||
const struct iommu_user_data *user_data);
|
||||
#else
|
||||
#define amd_iommufd_hw_info NULL
|
||||
#define amd_iommufd_viommu_init NULL
|
||||
#define amd_iommufd_get_viommu_size NULL
|
||||
#endif /* CONFIG_AMD_IOMMU_IOMMUFD */
|
||||
|
||||
#endif /* AMD_IOMMUFD_H */
|
||||
294
drivers/iommu/amd/nested.c
Normal file
294
drivers/iommu/amd/nested.c
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#define dev_fmt(fmt) "AMD-Vi: " fmt
|
||||
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
#include "amd_iommu.h"
|
||||
|
||||
static const struct iommu_domain_ops nested_domain_ops;
|
||||
|
||||
static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
|
||||
{
|
||||
return container_of(dom, struct nested_domain, domain);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate guest DTE to make sure that configuration for host (v1)
|
||||
* and guest (v2) page tables are valid when allocating nested domain.
|
||||
*/
|
||||
static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)
|
||||
{
|
||||
u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);
|
||||
|
||||
/* Must be zero: Mode, Host-TPR */
|
||||
if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||
|
||||
FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* GCR3 TRP must be non-zero if V, GV is set */
|
||||
if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&
|
||||
FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&
|
||||
FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&
|
||||
FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&
|
||||
FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* Valid Guest Paging Mode values are 0 and 1 */
|
||||
if (gpt_level != GUEST_PGTABLE_4_LEVEL &&
|
||||
gpt_level != GUEST_PGTABLE_5_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
/* GLX = 3 is reserved */
|
||||
if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We need to check host capability before setting
|
||||
* the Guest Paging Mode
|
||||
*/
|
||||
if (gpt_level == GUEST_PGTABLE_5_LEVEL &&
|
||||
amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)
|
||||
{
|
||||
struct guest_domain_mapping_info *elm, *res;
|
||||
|
||||
elm = xa_load(xa, index);
|
||||
if (elm)
|
||||
return elm;
|
||||
|
||||
xa_unlock(xa);
|
||||
elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL);
|
||||
xa_lock(xa);
|
||||
if (!elm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
|
||||
if (xa_is_err(res))
|
||||
res = ERR_PTR(xa_err(res));
|
||||
|
||||
if (res) {
|
||||
kfree(elm);
|
||||
return res;
|
||||
}
|
||||
|
||||
refcount_set(&elm->users, 0);
|
||||
return elm;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
|
||||
* during the call to struct iommu_ops.viommu_init().
|
||||
*/
|
||||
struct iommu_domain *
|
||||
amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
|
||||
const struct iommu_user_data *user_data)
|
||||
{
|
||||
int ret;
|
||||
struct nested_domain *ndom;
|
||||
struct guest_domain_mapping_info *gdom_info;
|
||||
struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
|
||||
|
||||
if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
|
||||
if (!ndom)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,
|
||||
IOMMU_HWPT_DATA_AMD_GUEST,
|
||||
dte);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
ret = validate_gdte_nested(&ndom->gdte);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);
|
||||
ndom->domain.ops = &nested_domain_ops;
|
||||
ndom->domain.type = IOMMU_DOMAIN_NESTED;
|
||||
ndom->viommu = aviommu;
|
||||
|
||||
/*
|
||||
* Normally, when a guest has multiple pass-through devices,
|
||||
* the IOMMU driver setup DTEs with the same stage-2 table and
|
||||
* use the same host domain ID (hDomId). In case of nested translation,
|
||||
* if the guest setup different stage-1 tables with same PASID,
|
||||
* IOMMU would use the same TLB tag. This will results in TLB
|
||||
* aliasing issue.
|
||||
*
|
||||
* The guest is assigning gDomIDs based on its own algorithm for managing
|
||||
* cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
|
||||
* (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
|
||||
* to a single hDomID. This is done using an xarray in the vIOMMU to
|
||||
* keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
|
||||
* command must be issued for each hDomID in the xarray.
|
||||
*/
|
||||
xa_lock(&aviommu->gdomid_array);
|
||||
|
||||
gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);
|
||||
if (IS_ERR(gdom_info)) {
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
ret = PTR_ERR(gdom_info);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Check if gDomID exist */
|
||||
if (refcount_inc_not_zero(&gdom_info->users)) {
|
||||
ndom->gdom_info = gdom_info;
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
|
||||
pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
|
||||
__func__, ndom->gdom_id, gdom_info->hdom_id);
|
||||
|
||||
return &ndom->domain;
|
||||
}
|
||||
|
||||
/* The gDomID does not exist. We allocate new hdom_id */
|
||||
gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
|
||||
if (gdom_info->hdom_id <= 0) {
|
||||
__xa_cmpxchg(&aviommu->gdomid_array,
|
||||
ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
ret = -ENOSPC;
|
||||
goto out_err_gdom_info;
|
||||
}
|
||||
|
||||
ndom->gdom_info = gdom_info;
|
||||
refcount_set(&gdom_info->users, 1);
|
||||
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
|
||||
pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
|
||||
__func__, ndom->gdom_id, gdom_info->hdom_id);
|
||||
|
||||
return &ndom->domain;
|
||||
|
||||
out_err_gdom_info:
|
||||
kfree(gdom_info);
|
||||
out_err:
|
||||
kfree(ndom);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom,
|
||||
struct iommu_dev_data *dev_data, struct dev_table_entry *new)
|
||||
{
|
||||
struct protection_domain *parent;
|
||||
struct nested_domain *ndom = to_ndomain(dom);
|
||||
struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;
|
||||
struct pt_iommu_amdv1_hw_info pt_info;
|
||||
|
||||
/*
|
||||
* The nest parent domain is attached during the call to the
|
||||
* struct iommu_ops.viommu_init(), which will be stored as part
|
||||
* of the struct amd_iommu_viommu.parent.
|
||||
*/
|
||||
if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))
|
||||
return;
|
||||
|
||||
parent = ndom->viommu->parent;
|
||||
amd_iommu_make_clear_dte(dev_data, new);
|
||||
|
||||
/* Retrieve the current pagetable info via the IOMMU PT API. */
|
||||
pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info);
|
||||
|
||||
/*
|
||||
* Use domain ID from nested domain to program DTE.
|
||||
* See amd_iommu_alloc_domain_nested().
|
||||
*/
|
||||
amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id,
|
||||
&pt_info, new);
|
||||
|
||||
/* GV is required for nested page table */
|
||||
new->data[0] |= DTE_FLAG_GV;
|
||||
|
||||
/* Guest PPR */
|
||||
new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR;
|
||||
|
||||
/* Guest translation stuff */
|
||||
new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV);
|
||||
|
||||
/* GCR3 table */
|
||||
new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12;
|
||||
new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);
|
||||
|
||||
/* Guest paging mode */
|
||||
new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;
|
||||
}
|
||||
|
||||
static int nested_attach_device(struct iommu_domain *dom, struct device *dev,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
struct dev_table_entry new = {0};
|
||||
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
|
||||
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Needs to make sure PASID is not enabled
|
||||
* for this attach path.
|
||||
*/
|
||||
if (WARN_ON(dev_data->pasid_enabled))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&dev_data->mutex);
|
||||
|
||||
set_dte_nested(iommu, dom, dev_data, &new);
|
||||
|
||||
amd_iommu_update_dte(iommu, dev_data, &new);
|
||||
|
||||
mutex_unlock(&dev_data->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nested_domain_free(struct iommu_domain *dom)
|
||||
{
|
||||
struct guest_domain_mapping_info *curr;
|
||||
struct nested_domain *ndom = to_ndomain(dom);
|
||||
struct amd_iommu_viommu *aviommu = ndom->viommu;
|
||||
|
||||
xa_lock(&aviommu->gdomid_array);
|
||||
|
||||
if (!refcount_dec_and_test(&ndom->gdom_info->users)) {
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The refcount for the gdom_id to hdom_id mapping is zero.
|
||||
* It is now safe to remove the mapping.
|
||||
*/
|
||||
curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
|
||||
ndom->gdom_info, NULL, GFP_ATOMIC);
|
||||
|
||||
xa_unlock(&aviommu->gdomid_array);
|
||||
if (WARN_ON(!curr || xa_err(curr)))
|
||||
return;
|
||||
|
||||
/* success */
|
||||
pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
|
||||
__func__, ndom->gdom_id, curr->hdom_id);
|
||||
|
||||
amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
|
||||
kfree(curr);
|
||||
kfree(ndom);
|
||||
}
|
||||
|
||||
static const struct iommu_domain_ops nested_domain_ops = {
|
||||
.attach_dev = nested_attach_device,
|
||||
.free = nested_domain_free,
|
||||
};
|
||||
|
|
@ -121,7 +121,6 @@ config ARM_SMMU_V3_KUNIT_TEST
|
|||
|
||||
config TEGRA241_CMDQV
|
||||
bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
|
||||
depends on ACPI
|
||||
help
|
||||
Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
|
||||
CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
|
||||
|
|
|
|||
|
|
@ -177,7 +177,9 @@ static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
|
|||
* config bit here base this off the EATS value in the STE. If the EATS
|
||||
* is set then the VM must generate ATC flushes.
|
||||
*/
|
||||
state.disable_ats = !nested_domain->enable_ats;
|
||||
if (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])) ==
|
||||
STRTAB_STE_0_CFG_S1_TRANS)
|
||||
state.disable_ats = !nested_domain->enable_ats;
|
||||
ret = arm_smmu_attach_prepare(&state, domain);
|
||||
if (ret) {
|
||||
mutex_unlock(&arm_smmu_asid_lock);
|
||||
|
|
|
|||
|
|
@ -33,18 +33,25 @@ static struct mm_struct sva_mm = {
|
|||
enum arm_smmu_test_master_feat {
|
||||
ARM_SMMU_MASTER_TEST_ATS = BIT(0),
|
||||
ARM_SMMU_MASTER_TEST_STALL = BIT(1),
|
||||
ARM_SMMU_MASTER_TEST_NESTED = BIT(2),
|
||||
};
|
||||
|
||||
static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste,
|
||||
enum arm_smmu_test_master_feat feat);
|
||||
|
||||
static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry,
|
||||
const __le64 *used_bits,
|
||||
const __le64 *target,
|
||||
const __le64 *safe,
|
||||
unsigned int length)
|
||||
{
|
||||
bool differs = false;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if ((entry[i] & used_bits[i]) != target[i])
|
||||
__le64 used = used_bits[i] & ~safe[i];
|
||||
|
||||
if ((entry[i] & used) != (target[i] & used))
|
||||
differs = true;
|
||||
}
|
||||
return differs;
|
||||
|
|
@ -56,12 +63,24 @@ arm_smmu_test_writer_record_syncs(struct arm_smmu_entry_writer *writer)
|
|||
struct arm_smmu_test_writer *test_writer =
|
||||
container_of(writer, struct arm_smmu_test_writer, writer);
|
||||
__le64 *entry_used_bits;
|
||||
__le64 *safe_target;
|
||||
__le64 *safe_init;
|
||||
|
||||
entry_used_bits = kunit_kzalloc(
|
||||
test_writer->test, sizeof(*entry_used_bits) * NUM_ENTRY_QWORDS,
|
||||
GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_NULL(test_writer->test, entry_used_bits);
|
||||
|
||||
safe_target = kunit_kzalloc(test_writer->test,
|
||||
sizeof(*safe_target) * NUM_ENTRY_QWORDS,
|
||||
GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_NULL(test_writer->test, safe_target);
|
||||
|
||||
safe_init = kunit_kzalloc(test_writer->test,
|
||||
sizeof(*safe_init) * NUM_ENTRY_QWORDS,
|
||||
GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_NULL(test_writer->test, safe_init);
|
||||
|
||||
pr_debug("STE value is now set to: ");
|
||||
print_hex_dump_debug(" ", DUMP_PREFIX_NONE, 16, 8,
|
||||
test_writer->entry,
|
||||
|
|
@ -79,14 +98,23 @@ arm_smmu_test_writer_record_syncs(struct arm_smmu_entry_writer *writer)
|
|||
* configuration.
|
||||
*/
|
||||
writer->ops->get_used(test_writer->entry, entry_used_bits);
|
||||
if (writer->ops->get_update_safe)
|
||||
writer->ops->get_update_safe(test_writer->entry,
|
||||
test_writer->init_entry,
|
||||
safe_init);
|
||||
if (writer->ops->get_update_safe)
|
||||
writer->ops->get_update_safe(test_writer->entry,
|
||||
test_writer->target_entry,
|
||||
safe_target);
|
||||
KUNIT_EXPECT_FALSE(
|
||||
test_writer->test,
|
||||
arm_smmu_entry_differs_in_used_bits(
|
||||
test_writer->entry, entry_used_bits,
|
||||
test_writer->init_entry, NUM_ENTRY_QWORDS) &&
|
||||
test_writer->init_entry, safe_init,
|
||||
NUM_ENTRY_QWORDS) &&
|
||||
arm_smmu_entry_differs_in_used_bits(
|
||||
test_writer->entry, entry_used_bits,
|
||||
test_writer->target_entry,
|
||||
test_writer->target_entry, safe_target,
|
||||
NUM_ENTRY_QWORDS));
|
||||
}
|
||||
}
|
||||
|
|
@ -106,6 +134,7 @@ arm_smmu_v3_test_debug_print_used_bits(struct arm_smmu_entry_writer *writer,
|
|||
static const struct arm_smmu_entry_writer_ops test_ste_ops = {
|
||||
.sync = arm_smmu_test_writer_record_syncs,
|
||||
.get_used = arm_smmu_get_ste_used,
|
||||
.get_update_safe = arm_smmu_get_ste_update_safe,
|
||||
};
|
||||
|
||||
static const struct arm_smmu_entry_writer_ops test_cd_ops = {
|
||||
|
|
@ -185,6 +214,18 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste,
|
|||
};
|
||||
|
||||
arm_smmu_make_cdtable_ste(ste, &master, ats_enabled, s1dss);
|
||||
if (feat & ARM_SMMU_MASTER_TEST_NESTED) {
|
||||
struct arm_smmu_ste s2ste;
|
||||
int i;
|
||||
|
||||
arm_smmu_test_make_s2_ste(&s2ste,
|
||||
feat & ~ARM_SMMU_MASTER_TEST_NESTED);
|
||||
ste->data[0] |= cpu_to_le64(
|
||||
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_NESTED));
|
||||
ste->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
|
||||
for (i = 2; i < NUM_ENTRY_QWORDS; i++)
|
||||
ste->data[i] = s2ste.data[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test)
|
||||
|
|
@ -542,6 +583,35 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1_stall(struct kunit *test)
|
|||
NUM_EXPECTED_SYNCS(3));
|
||||
}
|
||||
|
||||
static void
|
||||
arm_smmu_v3_write_ste_test_nested_s1dssbypass_to_s1bypass(struct kunit *test)
|
||||
{
|
||||
struct arm_smmu_ste s1_ste;
|
||||
struct arm_smmu_ste s2_ste;
|
||||
|
||||
arm_smmu_test_make_cdtable_ste(
|
||||
&s1_ste, STRTAB_STE_1_S1DSS_BYPASS, fake_cdtab_dma_addr,
|
||||
ARM_SMMU_MASTER_TEST_ATS | ARM_SMMU_MASTER_TEST_NESTED);
|
||||
arm_smmu_test_make_s2_ste(&s2_ste, 0);
|
||||
/* Expect an additional sync to unset ignored bits: EATS and MEV */
|
||||
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste,
|
||||
NUM_EXPECTED_SYNCS(3));
|
||||
}
|
||||
|
||||
static void
|
||||
arm_smmu_v3_write_ste_test_nested_s1bypass_to_s1dssbypass(struct kunit *test)
|
||||
{
|
||||
struct arm_smmu_ste s1_ste;
|
||||
struct arm_smmu_ste s2_ste;
|
||||
|
||||
arm_smmu_test_make_cdtable_ste(
|
||||
&s1_ste, STRTAB_STE_1_S1DSS_BYPASS, fake_cdtab_dma_addr,
|
||||
ARM_SMMU_MASTER_TEST_ATS | ARM_SMMU_MASTER_TEST_NESTED);
|
||||
arm_smmu_test_make_s2_ste(&s2_ste, 0);
|
||||
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste,
|
||||
NUM_EXPECTED_SYNCS(2));
|
||||
}
|
||||
|
||||
static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test)
|
||||
{
|
||||
struct arm_smmu_cd cd = {};
|
||||
|
|
@ -588,6 +658,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = {
|
|||
KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid),
|
||||
KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2_stall),
|
||||
KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall),
|
||||
KUNIT_CASE(arm_smmu_v3_write_ste_test_nested_s1dssbypass_to_s1bypass),
|
||||
KUNIT_CASE(arm_smmu_v3_write_ste_test_nested_s1bypass_to_s1dssbypass),
|
||||
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
|
||||
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
|
||||
{},
|
||||
|
|
|
|||
|
|
@ -487,20 +487,26 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
|
|||
*/
|
||||
static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
|
||||
{
|
||||
int val;
|
||||
|
||||
/*
|
||||
* We can try to avoid the cmpxchg() loop by simply incrementing the
|
||||
* lock counter. When held in exclusive state, the lock counter is set
|
||||
* to INT_MIN so these increments won't hurt as the value will remain
|
||||
* negative.
|
||||
* When held in exclusive state, the lock counter is set to INT_MIN
|
||||
* so these increments won't hurt as the value will remain negative.
|
||||
* The increment will also signal the exclusive locker that there are
|
||||
* shared waiters.
|
||||
*/
|
||||
if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
|
||||
return;
|
||||
|
||||
do {
|
||||
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
|
||||
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
|
||||
/*
|
||||
* Someone else is holding the lock in exclusive state, so wait
|
||||
* for them to finish. Since we already incremented the lock counter,
|
||||
* no exclusive lock can be acquired until we finish. We don't need
|
||||
* the return value since we only care that the exclusive lock is
|
||||
* released (i.e. the lock counter is non-negative).
|
||||
* Once the exclusive locker releases the lock, the sign bit will
|
||||
* be cleared and our increment will make the lock counter positive,
|
||||
* allowing us to proceed.
|
||||
*/
|
||||
atomic_cond_read_relaxed(&cmdq->lock, VAL > 0);
|
||||
}
|
||||
|
||||
static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
|
||||
|
|
@ -527,9 +533,14 @@ static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
|
|||
__ret; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Only clear the sign bit when releasing the exclusive lock this will
|
||||
* allow any shared_lock() waiters to proceed without the possibility
|
||||
* of entering the exclusive lock in a tight loop.
|
||||
*/
|
||||
#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
|
||||
({ \
|
||||
atomic_set_release(&cmdq->lock, 0); \
|
||||
atomic_fetch_andnot_release(INT_MIN, &cmdq->lock); \
|
||||
local_irq_restore(flags); \
|
||||
})
|
||||
|
||||
|
|
@ -1082,6 +1093,49 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
|
|||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
|
||||
|
||||
VISIBLE_IF_KUNIT
|
||||
void arm_smmu_get_ste_update_safe(const __le64 *cur, const __le64 *target,
|
||||
__le64 *safe_bits)
|
||||
{
|
||||
const __le64 eats_s1chk =
|
||||
FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_S1CHK);
|
||||
const __le64 eats_trans =
|
||||
FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
|
||||
|
||||
/*
|
||||
* When an STE changes EATS_TRANS, the sequencing code in the attach
|
||||
* logic already will have the PCI cap for ATS disabled. Thus at this
|
||||
* moment we can expect that the device will not generate ATS queries
|
||||
* and so we don't care about the sequencing of EATS. The purpose of
|
||||
* EATS_TRANS is to protect the system from hostile untrusted devices
|
||||
* that issue ATS when the PCI config space is disabled. However, if
|
||||
* EATS_TRANS is being changed, then we must have already trusted the
|
||||
* device as the EATS_TRANS security block is being disabled.
|
||||
*
|
||||
* Note: now the EATS_TRANS update is moved to the first entry_set().
|
||||
* Changing S2S and EATS might transiently result in S2S=1 and EATS=1
|
||||
* which is a bad STE (see "5.2 Stream Table Entry"). In such a case,
|
||||
* we can't do a hitless update. Also, it should not be added to the
|
||||
* safe bits with STRTAB_STE_1_EATS_S1CHK, because EATS=0b11 would be
|
||||
* effectively an errant 0b00 configuration.
|
||||
*/
|
||||
if (!((cur[1] | target[1]) & cpu_to_le64(eats_s1chk)) &&
|
||||
!((cur[2] | target[2]) & cpu_to_le64(STRTAB_STE_2_S2S)))
|
||||
safe_bits[1] |= cpu_to_le64(eats_trans);
|
||||
|
||||
/*
|
||||
* MEV does not meaningfully impact the operation of the HW, it only
|
||||
* changes how many fault events are generated, thus we can relax it
|
||||
* when computing the ordering. The spec notes the device can act like
|
||||
* MEV=1 anyhow:
|
||||
*
|
||||
* Note: Software must expect, and be able to deal with, coalesced
|
||||
* fault records even when MEV == 0.
|
||||
*/
|
||||
safe_bits[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_update_safe);
|
||||
|
||||
/*
|
||||
* Figure out if we can do a hitless update of entry to become target. Returns a
|
||||
* bit mask where 1 indicates that qword needs to be set disruptively.
|
||||
|
|
@ -1094,13 +1148,22 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
|
|||
{
|
||||
__le64 target_used[NUM_ENTRY_QWORDS] = {};
|
||||
__le64 cur_used[NUM_ENTRY_QWORDS] = {};
|
||||
__le64 safe[NUM_ENTRY_QWORDS] = {};
|
||||
u8 used_qword_diff = 0;
|
||||
unsigned int i;
|
||||
|
||||
writer->ops->get_used(entry, cur_used);
|
||||
writer->ops->get_used(target, target_used);
|
||||
if (writer->ops->get_update_safe)
|
||||
writer->ops->get_update_safe(entry, target, safe);
|
||||
|
||||
for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
|
||||
/*
|
||||
* Safe is only used for bits that are used by both entries,
|
||||
* otherwise it is sequenced according to the unused entry.
|
||||
*/
|
||||
safe[i] &= target_used[i] & cur_used[i];
|
||||
|
||||
/*
|
||||
* Check that masks are up to date, the make functions are not
|
||||
* allowed to set a bit to 1 if the used function doesn't say it
|
||||
|
|
@ -1109,6 +1172,7 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
|
|||
WARN_ON_ONCE(target[i] & ~target_used[i]);
|
||||
|
||||
/* Bits can change because they are not currently being used */
|
||||
cur_used[i] &= ~safe[i];
|
||||
unused_update[i] = (entry[i] & cur_used[i]) |
|
||||
(target[i] & ~cur_used[i]);
|
||||
/*
|
||||
|
|
@ -1121,7 +1185,7 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
|
|||
return used_qword_diff;
|
||||
}
|
||||
|
||||
static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
|
||||
static void entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
|
||||
const __le64 *target, unsigned int start,
|
||||
unsigned int len)
|
||||
{
|
||||
|
|
@ -1137,7 +1201,6 @@ static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
|
|||
|
||||
if (changed)
|
||||
writer->ops->sync(writer);
|
||||
return changed;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1207,12 +1270,9 @@ void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
|
|||
entry_set(writer, entry, target, 0, 1);
|
||||
} else {
|
||||
/*
|
||||
* No inuse bit changed. Sanity check that all unused bits are 0
|
||||
* in the entry. The target was already sanity checked by
|
||||
* compute_qword_diff().
|
||||
* No inuse bit changed, though safe bits may have changed.
|
||||
*/
|
||||
WARN_ON_ONCE(
|
||||
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
|
||||
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
|
||||
|
|
@ -1543,6 +1603,7 @@ static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
|
|||
static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
|
||||
.sync = arm_smmu_ste_writer_sync_entry,
|
||||
.get_used = arm_smmu_get_ste_used,
|
||||
.get_update_safe = arm_smmu_get_ste_update_safe,
|
||||
};
|
||||
|
||||
static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
|
||||
|
|
@ -2551,7 +2612,7 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
|
|||
ARM_SMMU_FEAT_VAX) ? 52 : 48;
|
||||
|
||||
pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
|
||||
pgtbl_cfg.oas = smmu->ias;
|
||||
pgtbl_cfg.oas = smmu->oas;
|
||||
if (enable_dirty)
|
||||
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
|
||||
fmt = ARM_64_LPAE_S1;
|
||||
|
|
@ -2561,7 +2622,7 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
|
|||
case ARM_SMMU_DOMAIN_S2:
|
||||
if (enable_dirty)
|
||||
return -EOPNOTSUPP;
|
||||
pgtbl_cfg.ias = smmu->ias;
|
||||
pgtbl_cfg.ias = smmu->oas;
|
||||
pgtbl_cfg.oas = smmu->oas;
|
||||
fmt = ARM_64_LPAE_S2;
|
||||
finalise_stage_fn = arm_smmu_domain_finalise_s2;
|
||||
|
|
@ -3125,7 +3186,8 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
|
|||
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
|
||||
struct arm_smmu_cd *cd, struct iommu_domain *old)
|
||||
{
|
||||
struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
|
||||
struct iommu_domain *sid_domain =
|
||||
iommu_driver_get_domain_for_dev(master->dev);
|
||||
struct arm_smmu_attach_state state = {
|
||||
.master = master,
|
||||
.ssid = pasid,
|
||||
|
|
@ -3191,7 +3253,7 @@ static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
|
|||
*/
|
||||
if (!arm_smmu_ssids_in_use(&master->cd_table)) {
|
||||
struct iommu_domain *sid_domain =
|
||||
iommu_get_domain_for_dev(master->dev);
|
||||
iommu_driver_get_domain_for_dev(master->dev);
|
||||
|
||||
if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
|
||||
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
|
||||
|
|
@ -4395,13 +4457,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
|
|||
}
|
||||
|
||||
/* We only support the AArch64 table format at present */
|
||||
switch (FIELD_GET(IDR0_TTF, reg)) {
|
||||
case IDR0_TTF_AARCH32_64:
|
||||
smmu->ias = 40;
|
||||
fallthrough;
|
||||
case IDR0_TTF_AARCH64:
|
||||
break;
|
||||
default:
|
||||
if (!(FIELD_GET(IDR0_TTF, reg) & IDR0_TTF_AARCH64)) {
|
||||
dev_err(smmu->dev, "AArch64 table format not supported!\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
|
|
@ -4514,8 +4570,6 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
|
|||
dev_warn(smmu->dev,
|
||||
"failed to set DMA mask for table walker\n");
|
||||
|
||||
smmu->ias = max(smmu->ias, smmu->oas);
|
||||
|
||||
if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
|
||||
(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
|
||||
smmu->features |= ARM_SMMU_FEAT_NESTING;
|
||||
|
|
@ -4525,11 +4579,40 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
|
|||
if (arm_smmu_sva_supported(smmu))
|
||||
smmu->features |= ARM_SMMU_FEAT_SVA;
|
||||
|
||||
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
|
||||
smmu->ias, smmu->oas, smmu->features);
|
||||
dev_info(smmu->dev, "oas %lu-bit (features 0x%08x)\n",
|
||||
smmu->oas, smmu->features);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEGRA241_CMDQV
|
||||
static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,
|
||||
struct arm_smmu_device *smmu)
|
||||
{
|
||||
struct platform_device *pdev;
|
||||
struct device_node *np;
|
||||
|
||||
np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0);
|
||||
if (!np)
|
||||
return;
|
||||
|
||||
/* Tegra241 CMDQV driver is responsible for put_device() */
|
||||
pdev = of_find_device_by_node(np);
|
||||
of_node_put(np);
|
||||
if (!pdev)
|
||||
return;
|
||||
|
||||
smmu->impl_dev = &pdev->dev;
|
||||
smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
|
||||
dev_dbg(smmu->dev, "found companion CMDQV device: %s\n",
|
||||
dev_name(smmu->impl_dev));
|
||||
}
|
||||
#else
|
||||
static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,
|
||||
struct arm_smmu_device *smmu)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
#ifdef CONFIG_TEGRA241_CMDQV
|
||||
static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
|
||||
|
|
@ -4542,10 +4625,11 @@ static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
|
|||
adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
|
||||
if (adev) {
|
||||
/* Tegra241 CMDQV driver is responsible for put_device() */
|
||||
smmu->impl_dev = &adev->dev;
|
||||
smmu->impl_dev = get_device(acpi_get_first_physical_node(adev));
|
||||
smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
|
||||
dev_info(smmu->dev, "found companion CMDQV device: %s\n",
|
||||
dev_name(smmu->impl_dev));
|
||||
acpi_dev_put(adev);
|
||||
}
|
||||
kfree(uid);
|
||||
}
|
||||
|
|
@ -4634,6 +4718,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
|
|||
if (of_dma_is_coherent(dev->of_node))
|
||||
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
|
||||
|
||||
if (of_device_is_compatible(dev->of_node, "nvidia,tegra264-smmu"))
|
||||
tegra_cmdqv_dt_probe(dev->of_node, smmu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ struct arm_vsmmu;
|
|||
#define IDR0_COHACC (1 << 4)
|
||||
#define IDR0_TTF GENMASK(3, 2)
|
||||
#define IDR0_TTF_AARCH64 2
|
||||
#define IDR0_TTF_AARCH32_64 3
|
||||
#define IDR0_S1P (1 << 1)
|
||||
#define IDR0_S2P (1 << 0)
|
||||
|
||||
|
|
@ -784,7 +783,6 @@ struct arm_smmu_device {
|
|||
int gerr_irq;
|
||||
int combined_irq;
|
||||
|
||||
unsigned long ias; /* IPA */
|
||||
unsigned long oas; /* PA */
|
||||
unsigned long pgsize_bitmap;
|
||||
|
||||
|
|
@ -900,6 +898,8 @@ struct arm_smmu_entry_writer {
|
|||
|
||||
struct arm_smmu_entry_writer_ops {
|
||||
void (*get_used)(const __le64 *entry, __le64 *used);
|
||||
void (*get_update_safe)(const __le64 *cur, const __le64 *target,
|
||||
__le64 *safe_bits);
|
||||
void (*sync)(struct arm_smmu_entry_writer *writer);
|
||||
};
|
||||
|
||||
|
|
@ -911,6 +911,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
|
|||
|
||||
#if IS_ENABLED(CONFIG_KUNIT)
|
||||
void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits);
|
||||
void arm_smmu_get_ste_update_safe(const __le64 *cur, const __le64 *target,
|
||||
__le64 *safe_bits);
|
||||
void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *cur,
|
||||
const __le64 *target);
|
||||
void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits);
|
||||
|
|
|
|||
|
|
@ -3,17 +3,15 @@
|
|||
|
||||
#define dev_fmt(fmt) "tegra241_cmdqv: " fmt
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
#include <acpi/acpixf.h>
|
||||
|
||||
#include "arm-smmu-v3.h"
|
||||
|
||||
/* CMDQV register page base and size defines */
|
||||
|
|
@ -854,69 +852,6 @@ static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
|
|||
|
||||
/* Probe Functions */
|
||||
|
||||
static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data)
|
||||
{
|
||||
struct resource_win win;
|
||||
|
||||
return !acpi_dev_resource_address_space(res, &win);
|
||||
}
|
||||
|
||||
static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data)
|
||||
{
|
||||
struct resource r;
|
||||
int *irq = data;
|
||||
|
||||
if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r))
|
||||
*irq = r.start;
|
||||
return 1; /* No need to add resource to the list */
|
||||
}
|
||||
|
||||
static struct resource *
|
||||
tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq)
|
||||
{
|
||||
struct acpi_device *adev = to_acpi_device(dev);
|
||||
struct list_head resource_list;
|
||||
struct resource_entry *rentry;
|
||||
struct resource *res = NULL;
|
||||
int ret;
|
||||
|
||||
INIT_LIST_HEAD(&resource_list);
|
||||
ret = acpi_dev_get_resources(adev, &resource_list,
|
||||
tegra241_cmdqv_acpi_is_memory, NULL);
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "failed to get memory resource: %d\n", ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rentry = list_first_entry_or_null(&resource_list,
|
||||
struct resource_entry, node);
|
||||
if (!rentry) {
|
||||
dev_err(dev, "failed to get memory resource entry\n");
|
||||
goto free_list;
|
||||
}
|
||||
|
||||
/* Caller must free the res */
|
||||
res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
if (!res)
|
||||
goto free_list;
|
||||
|
||||
*res = *rentry->res;
|
||||
|
||||
acpi_dev_free_resource_list(&resource_list);
|
||||
|
||||
INIT_LIST_HEAD(&resource_list);
|
||||
|
||||
if (irq)
|
||||
ret = acpi_dev_get_resources(adev, &resource_list,
|
||||
tegra241_cmdqv_acpi_get_irqs, irq);
|
||||
if (ret < 0 || !irq || *irq <= 0)
|
||||
dev_warn(dev, "no interrupt. errors will not be reported\n");
|
||||
|
||||
free_list:
|
||||
acpi_dev_free_resource_list(&resource_list);
|
||||
return res;
|
||||
}
|
||||
|
||||
static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
|
||||
{
|
||||
struct tegra241_cmdqv *cmdqv =
|
||||
|
|
@ -1042,18 +977,23 @@ iounmap:
|
|||
|
||||
struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
|
||||
{
|
||||
struct platform_device *pdev = to_platform_device(smmu->impl_dev);
|
||||
struct arm_smmu_device *new_smmu;
|
||||
struct resource *res = NULL;
|
||||
struct resource *res;
|
||||
int irq;
|
||||
|
||||
if (!smmu->dev->of_node)
|
||||
res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq);
|
||||
if (!res)
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
if (!res) {
|
||||
dev_err(&pdev->dev, "no memory resource found for CMDQV\n");
|
||||
goto out_fallback;
|
||||
}
|
||||
|
||||
irq = platform_get_irq_optional(pdev, 0);
|
||||
if (irq <= 0)
|
||||
dev_warn(&pdev->dev,
|
||||
"no interrupt. errors will not be reported\n");
|
||||
|
||||
new_smmu = __tegra241_cmdqv_probe(smmu, res, irq);
|
||||
kfree(res);
|
||||
|
||||
if (new_smmu)
|
||||
return new_smmu;
|
||||
|
||||
|
|
|
|||
|
|
@ -41,12 +41,40 @@ static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
|
|||
.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,fastrpc",
|
||||
.data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,qcm2290-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sa8775p-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ .compatible = "qcom,sc7280-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sc7280-venus",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sc8180x-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sc8280xp-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm6115-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm6125-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm6350-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm8150-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm8250-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm8350-mdss",
|
||||
.data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
|
||||
{ .compatible = "qcom,sm8450-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ .compatible = "qcom,sm8550-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ .compatible = "qcom,sm8650-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ .compatible = "qcom,sm8750-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ .compatible = "qcom,x1e80100-mdss",
|
||||
.data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
|
||||
{ }
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -761,14 +761,10 @@ static struct platform_driver qcom_iommu_ctx_driver = {
|
|||
|
||||
static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu)
|
||||
{
|
||||
struct device_node *child;
|
||||
|
||||
for_each_child_of_node(qcom_iommu->dev->of_node, child) {
|
||||
for_each_child_of_node_scoped(qcom_iommu->dev->of_node, child) {
|
||||
if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec") ||
|
||||
of_device_is_compatible(child, "qcom,msm-iommu-v2-sec")) {
|
||||
of_node_put(child);
|
||||
of_device_is_compatible(child, "qcom,msm-iommu-v2-sec"))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -2097,10 +2097,8 @@ void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dma_iova_destroy);
|
||||
|
||||
void iommu_setup_dma_ops(struct device *dev)
|
||||
void iommu_setup_dma_ops(struct device *dev, struct iommu_domain *domain)
|
||||
{
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
|
||||
if (dev_is_pci(dev))
|
||||
dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#ifdef CONFIG_IOMMU_DMA
|
||||
|
||||
void iommu_setup_dma_ops(struct device *dev);
|
||||
void iommu_setup_dma_ops(struct device *dev, struct iommu_domain *domain);
|
||||
|
||||
int iommu_get_dma_cookie(struct iommu_domain *domain);
|
||||
void iommu_put_dma_cookie(struct iommu_domain *domain);
|
||||
|
|
@ -26,7 +26,8 @@ extern bool iommu_dma_forcedac;
|
|||
|
||||
#else /* CONFIG_IOMMU_DMA */
|
||||
|
||||
static inline void iommu_setup_dma_ops(struct device *dev)
|
||||
static inline void iommu_setup_dma_ops(struct device *dev,
|
||||
struct iommu_domain *domain)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -354,7 +354,8 @@ static inline int amdv1pt_iommu_set_prot(struct pt_common *common,
|
|||
* Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
|
||||
* control this. For now if the tables use sme_set then so do the ptes.
|
||||
*/
|
||||
if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES))
|
||||
if (pt_feature(common, PT_FEAT_AMDV1_ENCRYPT_TABLES) &&
|
||||
!(iommu_prot & IOMMU_MMIO))
|
||||
pte = __sme_set(pte);
|
||||
|
||||
attrs->descriptor_bits = pte;
|
||||
|
|
|
|||
|
|
@ -227,7 +227,8 @@ static inline int x86_64_pt_iommu_set_prot(struct pt_common *common,
|
|||
* Ideally we'd have an IOMMU_ENCRYPTED flag set by higher levels to
|
||||
* control this. For now if the tables use sme_set then so do the ptes.
|
||||
*/
|
||||
if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES))
|
||||
if (pt_feature(common, PT_FEAT_X86_64_AMD_ENCRYPT_TABLES) &&
|
||||
!(iommu_prot & IOMMU_MMIO))
|
||||
pte = __sme_set(pte);
|
||||
|
||||
attrs->descriptor_bits = pte;
|
||||
|
|
|
|||
|
|
@ -58,10 +58,9 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
|
|||
* Note that the sync frees the gather's free list, so we must
|
||||
* not have any pages on that list that are covered by iova/len
|
||||
*/
|
||||
} else if (pt_feature(common, PT_FEAT_FLUSH_RANGE)) {
|
||||
iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
|
||||
}
|
||||
|
||||
iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
|
||||
iommu_pages_list_splice(free_list, &iotlb_gather->freelist);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -363,6 +363,13 @@ static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16
|
|||
qi_batch_increment_index(iommu, batch);
|
||||
}
|
||||
|
||||
static bool intel_domain_use_piotlb(struct dmar_domain *domain)
|
||||
{
|
||||
return domain->domain.type == IOMMU_DOMAIN_SVA ||
|
||||
domain->domain.type == IOMMU_DOMAIN_NESTED ||
|
||||
intel_domain_is_fs_paging(domain);
|
||||
}
|
||||
|
||||
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
|
||||
unsigned long addr, unsigned long pages,
|
||||
unsigned long mask, int ih)
|
||||
|
|
@ -370,7 +377,7 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
|
|||
struct intel_iommu *iommu = tag->iommu;
|
||||
u64 type = DMA_TLB_PSI_FLUSH;
|
||||
|
||||
if (intel_domain_is_fs_paging(domain)) {
|
||||
if (intel_domain_use_piotlb(domain)) {
|
||||
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
|
||||
pages, ih, domain->qi_batch);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -1240,22 +1240,22 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
|
|||
}
|
||||
|
||||
did = context_domain_id(context);
|
||||
context_clear_entry(context);
|
||||
context_clear_present(context);
|
||||
__iommu_flush_cache(iommu, context, sizeof(*context));
|
||||
spin_unlock(&iommu->lock);
|
||||
intel_context_flush_no_pasid(info, context, did);
|
||||
context_clear_entry(context);
|
||||
__iommu_flush_cache(iommu, context, sizeof(*context));
|
||||
}
|
||||
|
||||
int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
|
||||
ioasid_t pasid, u16 did, phys_addr_t fsptptr,
|
||||
int flags, struct iommu_domain *old)
|
||||
{
|
||||
if (!old)
|
||||
return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
|
||||
did, flags);
|
||||
return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
|
||||
iommu_domain_did(old, iommu),
|
||||
flags);
|
||||
if (old)
|
||||
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
|
||||
|
||||
return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags);
|
||||
}
|
||||
|
||||
static int domain_setup_second_level(struct intel_iommu *iommu,
|
||||
|
|
@ -1263,23 +1263,20 @@ static int domain_setup_second_level(struct intel_iommu *iommu,
|
|||
struct device *dev, ioasid_t pasid,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
if (!old)
|
||||
return intel_pasid_setup_second_level(iommu, domain,
|
||||
dev, pasid);
|
||||
return intel_pasid_replace_second_level(iommu, domain, dev,
|
||||
iommu_domain_did(old, iommu),
|
||||
pasid);
|
||||
if (old)
|
||||
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
|
||||
|
||||
return intel_pasid_setup_second_level(iommu, domain, dev, pasid);
|
||||
}
|
||||
|
||||
static int domain_setup_passthrough(struct intel_iommu *iommu,
|
||||
struct device *dev, ioasid_t pasid,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
if (!old)
|
||||
return intel_pasid_setup_pass_through(iommu, dev, pasid);
|
||||
return intel_pasid_replace_pass_through(iommu, dev,
|
||||
iommu_domain_did(old, iommu),
|
||||
pasid);
|
||||
if (old)
|
||||
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
|
||||
|
||||
return intel_pasid_setup_pass_through(iommu, dev, pasid);
|
||||
}
|
||||
|
||||
static int domain_setup_first_level(struct intel_iommu *iommu,
|
||||
|
|
|
|||
|
|
@ -900,7 +900,26 @@ static inline int pfn_level_offset(u64 pfn, int level)
|
|||
|
||||
static inline void context_set_present(struct context_entry *context)
|
||||
{
|
||||
context->lo |= 1;
|
||||
u64 val;
|
||||
|
||||
dma_wmb();
|
||||
val = READ_ONCE(context->lo) | 1;
|
||||
WRITE_ONCE(context->lo, val);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the Present (P) bit (bit 0) of a context table entry. This initiates
|
||||
* the transition of the entry's ownership from hardware to software. The
|
||||
* caller is responsible for fulfilling the invalidation handshake recommended
|
||||
* by the VT-d spec, Section 6.5.3.3 (Guidance to Software for Invalidations).
|
||||
*/
|
||||
static inline void context_clear_present(struct context_entry *context)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
val = READ_ONCE(context->lo) & GENMASK_ULL(63, 1);
|
||||
WRITE_ONCE(context->lo, val);
|
||||
dma_wmb();
|
||||
}
|
||||
|
||||
static inline void context_set_fault_enable(struct context_entry *context)
|
||||
|
|
|
|||
|
|
@ -136,11 +136,10 @@ static int domain_setup_nested(struct intel_iommu *iommu,
|
|||
struct device *dev, ioasid_t pasid,
|
||||
struct iommu_domain *old)
|
||||
{
|
||||
if (!old)
|
||||
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
|
||||
return intel_pasid_replace_nested(iommu, dev, pasid,
|
||||
iommu_domain_did(old, iommu),
|
||||
domain);
|
||||
if (old)
|
||||
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
|
||||
|
||||
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
|
||||
}
|
||||
|
||||
static int intel_nested_set_dev_pasid(struct iommu_domain *domain,
|
||||
|
|
|
|||
|
|
@ -153,6 +153,9 @@ retry:
|
|||
if (!entries)
|
||||
return NULL;
|
||||
|
||||
if (!ecap_coherent(info->iommu->ecap))
|
||||
clflush_cache_range(entries, VTD_PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* The pasid directory table entry won't be freed after
|
||||
* allocation. No worry about the race with free and
|
||||
|
|
@ -165,10 +168,8 @@ retry:
|
|||
iommu_free_pages(entries);
|
||||
goto retry;
|
||||
}
|
||||
if (!ecap_coherent(info->iommu->ecap)) {
|
||||
clflush_cache_range(entries, VTD_PAGE_SIZE);
|
||||
if (!ecap_coherent(info->iommu->ecap))
|
||||
clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
|
||||
}
|
||||
}
|
||||
|
||||
return &entries[index];
|
||||
|
|
@ -218,7 +219,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
|
|||
if (!info || !info->ats_enabled)
|
||||
return;
|
||||
|
||||
if (pci_dev_is_disconnected(to_pci_dev(dev)))
|
||||
if (!pci_device_is_present(to_pci_dev(dev)))
|
||||
return;
|
||||
|
||||
sid = PCI_DEVID(info->bus, info->devfn);
|
||||
|
|
@ -272,7 +273,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
|
|||
|
||||
did = pasid_get_domain_id(pte);
|
||||
pgtt = pasid_pte_get_pgtt(pte);
|
||||
intel_pasid_clear_entry(dev, pasid, fault_ignore);
|
||||
pasid_clear_present(pte);
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
if (!ecap_coherent(iommu->ecap))
|
||||
|
|
@ -286,6 +287,10 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
|
|||
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
|
||||
|
||||
devtlb_invalidation_with_pasid(iommu, dev, pasid);
|
||||
intel_pasid_clear_entry(dev, pasid, fault_ignore);
|
||||
if (!ecap_coherent(iommu->ecap))
|
||||
clflush_cache_range(pte, sizeof(*pte));
|
||||
|
||||
if (!fault_ignore)
|
||||
intel_iommu_drain_pasid_prq(dev, pasid);
|
||||
}
|
||||
|
|
@ -412,50 +417,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
|
||||
struct device *dev, phys_addr_t fsptptr,
|
||||
u32 pasid, u16 did, u16 old_did,
|
||||
int flags)
|
||||
{
|
||||
struct pasid_entry *pte, new_pte;
|
||||
|
||||
if (!ecap_flts(iommu->ecap)) {
|
||||
pr_err("No first level translation support on %s\n",
|
||||
iommu->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
|
||||
pr_err("No 5-level paging support for first-level on %s\n",
|
||||
iommu->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
pte = intel_pasid_get_entry(dev, pasid);
|
||||
if (!pte) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!pasid_pte_is_present(pte)) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WARN_ON(old_did != pasid_get_domain_id(pte));
|
||||
|
||||
*pte = new_pte;
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
||||
intel_iommu_drain_pasid_prq(dev, pasid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up the scalable mode pasid entry for second only translation type.
|
||||
*/
|
||||
|
|
@ -522,51 +483,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
|
||||
struct dmar_domain *domain,
|
||||
struct device *dev, u16 old_did,
|
||||
u32 pasid)
|
||||
{
|
||||
struct pasid_entry *pte, new_pte;
|
||||
u16 did;
|
||||
|
||||
/*
|
||||
* If hardware advertises no support for second level
|
||||
* translation, return directly.
|
||||
*/
|
||||
if (!ecap_slts(iommu->ecap)) {
|
||||
pr_err("No second level translation support on %s\n",
|
||||
iommu->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
did = domain_id_iommu(domain, iommu);
|
||||
|
||||
pasid_pte_config_second_level(iommu, &new_pte, domain, did);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
pte = intel_pasid_get_entry(dev, pasid);
|
||||
if (!pte) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!pasid_pte_is_present(pte)) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WARN_ON(old_did != pasid_get_domain_id(pte));
|
||||
|
||||
*pte = new_pte;
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
||||
intel_iommu_drain_pasid_prq(dev, pasid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up dirty tracking on a second only or nested translation type.
|
||||
*/
|
||||
|
|
@ -679,38 +595,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
|
||||
struct device *dev, u16 old_did,
|
||||
u32 pasid)
|
||||
{
|
||||
struct pasid_entry *pte, new_pte;
|
||||
u16 did = FLPT_DEFAULT_DID;
|
||||
|
||||
pasid_pte_config_pass_through(iommu, &new_pte, did);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
pte = intel_pasid_get_entry(dev, pasid);
|
||||
if (!pte) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!pasid_pte_is_present(pte)) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WARN_ON(old_did != pasid_get_domain_id(pte));
|
||||
|
||||
*pte = new_pte;
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
||||
intel_iommu_drain_pasid_prq(dev, pasid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the page snoop control for a pasid entry which has been set up.
|
||||
*/
|
||||
|
|
@ -844,69 +728,6 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int intel_pasid_replace_nested(struct intel_iommu *iommu,
|
||||
struct device *dev, u32 pasid,
|
||||
u16 old_did, struct dmar_domain *domain)
|
||||
{
|
||||
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
|
||||
struct dmar_domain *s2_domain = domain->s2_domain;
|
||||
u16 did = domain_id_iommu(domain, iommu);
|
||||
struct pasid_entry *pte, new_pte;
|
||||
|
||||
/* Address width should match the address width supported by hardware */
|
||||
switch (s1_cfg->addr_width) {
|
||||
case ADDR_WIDTH_4LEVEL:
|
||||
break;
|
||||
case ADDR_WIDTH_5LEVEL:
|
||||
if (!cap_fl5lp_support(iommu->cap)) {
|
||||
dev_err_ratelimited(dev,
|
||||
"5-level paging not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
|
||||
s1_cfg->addr_width);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
|
||||
pr_err_ratelimited("No supervisor request support on %s\n",
|
||||
iommu->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
|
||||
pr_err_ratelimited("No extended access flag support on %s\n",
|
||||
iommu->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
|
||||
|
||||
spin_lock(&iommu->lock);
|
||||
pte = intel_pasid_get_entry(dev, pasid);
|
||||
if (!pte) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!pasid_pte_is_present(pte)) {
|
||||
spin_unlock(&iommu->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
WARN_ON(old_did != pasid_get_domain_id(pte));
|
||||
|
||||
*pte = new_pte;
|
||||
spin_unlock(&iommu->lock);
|
||||
|
||||
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
|
||||
intel_iommu_drain_pasid_prq(dev, pasid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interfaces to setup or teardown a pasid table to the scalable-mode
|
||||
* context table entry:
|
||||
|
|
@ -1019,7 +840,7 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
|
|||
}
|
||||
|
||||
if (context_copied(iommu, bus, devfn)) {
|
||||
context_clear_entry(context);
|
||||
context_clear_present(context);
|
||||
__iommu_flush_cache(iommu, context, sizeof(*context));
|
||||
|
||||
/*
|
||||
|
|
@ -1039,6 +860,9 @@ static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
|
|||
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
|
||||
devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
|
||||
|
||||
context_clear_entry(context);
|
||||
__iommu_flush_cache(iommu, context, sizeof(*context));
|
||||
|
||||
/*
|
||||
* At this point, the device is supposed to finish reset at
|
||||
* its driver probe stage, so no in-flight DMA will exist,
|
||||
|
|
@ -1102,6 +926,14 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info)
|
|||
if (!info->ats_enabled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Skip dev-IOTLB flush for inaccessible PCIe devices to prevent the
|
||||
* Intel IOMMU from waiting indefinitely for an ATS invalidation that
|
||||
* cannot complete.
|
||||
*/
|
||||
if (!pci_device_is_present(to_pci_dev(info->dev)))
|
||||
return;
|
||||
|
||||
qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
|
||||
info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
|
||||
|
||||
|
|
|
|||
|
|
@ -234,9 +234,23 @@ static inline void pasid_set_wpe(struct pasid_entry *pe)
|
|||
*/
|
||||
static inline void pasid_set_present(struct pasid_entry *pe)
|
||||
{
|
||||
dma_wmb();
|
||||
pasid_set_bits(&pe->val[0], 1 << 0, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the Present (P) bit (bit 0) of a scalable-mode PASID table entry.
|
||||
* This initiates the transition of the entry's ownership from hardware
|
||||
* to software. The caller is responsible for fulfilling the invalidation
|
||||
* handshake recommended by the VT-d spec, Section 6.5.3.3 (Guidance to
|
||||
* Software for Invalidations).
|
||||
*/
|
||||
static inline void pasid_clear_present(struct pasid_entry *pe)
|
||||
{
|
||||
pasid_set_bits(&pe->val[0], 1 << 0, 0);
|
||||
dma_wmb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
|
||||
* entry.
|
||||
|
|
@ -302,20 +316,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
|
|||
struct device *dev, u32 pasid);
|
||||
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
|
||||
u32 pasid, struct dmar_domain *domain);
|
||||
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
|
||||
struct device *dev, phys_addr_t fsptptr,
|
||||
u32 pasid, u16 did, u16 old_did, int flags);
|
||||
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
|
||||
struct dmar_domain *domain,
|
||||
struct device *dev, u16 old_did,
|
||||
u32 pasid);
|
||||
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
|
||||
struct device *dev, u16 old_did,
|
||||
u32 pasid);
|
||||
int intel_pasid_replace_nested(struct intel_iommu *iommu,
|
||||
struct device *dev, u32 pasid,
|
||||
u16 old_did, struct dmar_domain *domain);
|
||||
|
||||
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
|
||||
struct device *dev, u32 pasid,
|
||||
bool fault_ignore);
|
||||
|
|
|
|||
164
drivers/iommu/iommu-debug-pagealloc.c
Normal file
164
drivers/iommu/iommu-debug-pagealloc.c
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 - Google Inc
|
||||
* Author: Mostafa Saleh <smostafa@google.com>
|
||||
* IOMMU API debug page alloc sanitizer
|
||||
*/
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iommu-debug-pagealloc.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/page_ext.h>
|
||||
#include <linux/page_owner.h>
|
||||
|
||||
#include "iommu-priv.h"
|
||||
|
||||
static bool needed;
|
||||
DEFINE_STATIC_KEY_FALSE(iommu_debug_initialized);
|
||||
|
||||
struct iommu_debug_metadata {
|
||||
atomic_t ref;
|
||||
};
|
||||
|
||||
static __init bool need_iommu_debug(void)
|
||||
{
|
||||
return needed;
|
||||
}
|
||||
|
||||
struct page_ext_operations page_iommu_debug_ops = {
|
||||
.size = sizeof(struct iommu_debug_metadata),
|
||||
.need = need_iommu_debug,
|
||||
};
|
||||
|
||||
static struct iommu_debug_metadata *get_iommu_data(struct page_ext *page_ext)
|
||||
{
|
||||
return page_ext_data(page_ext, &page_iommu_debug_ops);
|
||||
}
|
||||
|
||||
static void iommu_debug_inc_page(phys_addr_t phys)
|
||||
{
|
||||
struct page_ext *page_ext = page_ext_from_phys(phys);
|
||||
struct iommu_debug_metadata *d;
|
||||
|
||||
if (!page_ext)
|
||||
return;
|
||||
|
||||
d = get_iommu_data(page_ext);
|
||||
WARN_ON(atomic_inc_return_relaxed(&d->ref) <= 0);
|
||||
page_ext_put(page_ext);
|
||||
}
|
||||
|
||||
static void iommu_debug_dec_page(phys_addr_t phys)
|
||||
{
|
||||
struct page_ext *page_ext = page_ext_from_phys(phys);
|
||||
struct iommu_debug_metadata *d;
|
||||
|
||||
if (!page_ext)
|
||||
return;
|
||||
|
||||
d = get_iommu_data(page_ext);
|
||||
WARN_ON(atomic_dec_return_relaxed(&d->ref) < 0);
|
||||
page_ext_put(page_ext);
|
||||
}
|
||||
|
||||
/*
|
||||
* IOMMU page size doesn't have to match the CPU page size. So, we use
|
||||
* the smallest IOMMU page size to refcount the pages in the vmemmap.
|
||||
* That is important as both map and unmap has to use the same page size
|
||||
* to update the refcount to avoid double counting the same page.
|
||||
* And as we can't know from iommu_unmap() what was the original page size
|
||||
* used for map, we just use the minimum supported one for both.
|
||||
*/
|
||||
static size_t iommu_debug_page_size(struct iommu_domain *domain)
|
||||
{
|
||||
return 1UL << __ffs(domain->pgsize_bitmap);
|
||||
}
|
||||
|
||||
static bool iommu_debug_page_count(const struct page *page)
|
||||
{
|
||||
unsigned int ref;
|
||||
struct page_ext *page_ext = page_ext_get(page);
|
||||
struct iommu_debug_metadata *d = get_iommu_data(page_ext);
|
||||
|
||||
ref = atomic_read(&d->ref);
|
||||
page_ext_put(page_ext);
|
||||
return ref != 0;
|
||||
}
|
||||
|
||||
void __iommu_debug_check_unmapped(const struct page *page, int numpages)
|
||||
{
|
||||
while (numpages--) {
|
||||
if (WARN_ON(iommu_debug_page_count(page))) {
|
||||
pr_warn("iommu: Detected page leak!\n");
|
||||
dump_page_owner(page);
|
||||
}
|
||||
page++;
|
||||
}
|
||||
}
|
||||
|
||||
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
|
||||
{
|
||||
size_t off, end;
|
||||
size_t page_size = iommu_debug_page_size(domain);
|
||||
|
||||
if (WARN_ON(!phys || check_add_overflow(phys, size, &end)))
|
||||
return;
|
||||
|
||||
for (off = 0 ; off < size ; off += page_size)
|
||||
iommu_debug_inc_page(phys + off);
|
||||
}
|
||||
|
||||
static void __iommu_debug_update_iova(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size, bool inc)
|
||||
{
|
||||
size_t off, end;
|
||||
size_t page_size = iommu_debug_page_size(domain);
|
||||
|
||||
if (WARN_ON(check_add_overflow(iova, size, &end)))
|
||||
return;
|
||||
|
||||
for (off = 0 ; off < size ; off += page_size) {
|
||||
phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
|
||||
|
||||
if (!phys)
|
||||
continue;
|
||||
|
||||
if (inc)
|
||||
iommu_debug_inc_page(phys);
|
||||
else
|
||||
iommu_debug_dec_page(phys);
|
||||
}
|
||||
}
|
||||
|
||||
void __iommu_debug_unmap_begin(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
__iommu_debug_update_iova(domain, iova, size, false);
|
||||
}
|
||||
|
||||
void __iommu_debug_unmap_end(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size,
|
||||
size_t unmapped)
|
||||
{
|
||||
if ((unmapped == size) || WARN_ON_ONCE(unmapped > size))
|
||||
return;
|
||||
|
||||
/* If unmap failed, re-increment the refcount. */
|
||||
__iommu_debug_update_iova(domain, iova + unmapped,
|
||||
size - unmapped, true);
|
||||
}
|
||||
|
||||
void iommu_debug_init(void)
|
||||
{
|
||||
if (!needed)
|
||||
return;
|
||||
|
||||
pr_info("iommu: Debugging page allocations, expect overhead or disable iommu.debug_pagealloc");
|
||||
static_branch_enable(&iommu_debug_initialized);
|
||||
}
|
||||
|
||||
static int __init iommu_debug_pagealloc(char *str)
|
||||
{
|
||||
return kstrtobool(str, &needed);
|
||||
}
|
||||
early_param("iommu.debug_pagealloc", iommu_debug_pagealloc);
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
#define __LINUX_IOMMU_PRIV_H
|
||||
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iommu-debug-pagealloc.h>
|
||||
#include <linux/msi.h>
|
||||
|
||||
static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
|
||||
|
|
@ -65,4 +66,61 @@ static inline int iommufd_sw_msi(struct iommu_domain *domain,
|
|||
int iommu_replace_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid,
|
||||
struct iommu_attach_handle *handle);
|
||||
|
||||
#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
|
||||
|
||||
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
|
||||
size_t size);
|
||||
void __iommu_debug_unmap_begin(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size);
|
||||
void __iommu_debug_unmap_end(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size, size_t unmapped);
|
||||
|
||||
static inline void iommu_debug_map(struct iommu_domain *domain,
|
||||
phys_addr_t phys, size_t size)
|
||||
{
|
||||
if (static_branch_unlikely(&iommu_debug_initialized))
|
||||
__iommu_debug_map(domain, phys, size);
|
||||
}
|
||||
|
||||
static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
if (static_branch_unlikely(&iommu_debug_initialized))
|
||||
__iommu_debug_unmap_begin(domain, iova, size);
|
||||
}
|
||||
|
||||
static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size,
|
||||
size_t unmapped)
|
||||
{
|
||||
if (static_branch_unlikely(&iommu_debug_initialized))
|
||||
__iommu_debug_unmap_end(domain, iova, size, unmapped);
|
||||
}
|
||||
|
||||
void iommu_debug_init(void);
|
||||
|
||||
#else
|
||||
static inline void iommu_debug_map(struct iommu_domain *domain,
|
||||
phys_addr_t phys, size_t size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t size,
|
||||
size_t unmapped)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void iommu_debug_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
|
||||
|
||||
#endif /* __LINUX_IOMMU_PRIV_H */
|
||||
|
|
|
|||
|
|
@ -61,6 +61,11 @@ struct iommu_group {
|
|||
int id;
|
||||
struct iommu_domain *default_domain;
|
||||
struct iommu_domain *blocking_domain;
|
||||
/*
|
||||
* During a group device reset, @resetting_domain points to the physical
|
||||
* domain, while @domain points to the attached domain before the reset.
|
||||
*/
|
||||
struct iommu_domain *resetting_domain;
|
||||
struct iommu_domain *domain;
|
||||
struct list_head entry;
|
||||
unsigned int owner_cnt;
|
||||
|
|
@ -232,6 +237,8 @@ static int __init iommu_subsys_init(void)
|
|||
if (!nb)
|
||||
return -ENOMEM;
|
||||
|
||||
iommu_debug_init();
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
|
||||
nb[i].notifier_call = iommu_bus_notifier;
|
||||
bus_register_notifier(iommu_buses[i], &nb[i]);
|
||||
|
|
@ -661,7 +668,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
|
|||
}
|
||||
|
||||
if (group->default_domain)
|
||||
iommu_setup_dma_ops(dev);
|
||||
iommu_setup_dma_ops(dev, group->default_domain);
|
||||
|
||||
mutex_unlock(&group->mutex);
|
||||
|
||||
|
|
@ -1173,12 +1180,11 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
|
|||
struct device *dev)
|
||||
{
|
||||
struct iommu_resv_region *entry;
|
||||
struct list_head mappings;
|
||||
LIST_HEAD(mappings);
|
||||
unsigned long pg_size;
|
||||
int ret = 0;
|
||||
|
||||
pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0;
|
||||
INIT_LIST_HEAD(&mappings);
|
||||
|
||||
if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size))
|
||||
return -EINVAL;
|
||||
|
|
@ -1949,7 +1955,7 @@ static int bus_iommu_probe(const struct bus_type *bus)
|
|||
return ret;
|
||||
}
|
||||
for_each_group_device(group, gdev)
|
||||
iommu_setup_dma_ops(gdev->dev);
|
||||
iommu_setup_dma_ops(gdev->dev, group->default_domain);
|
||||
mutex_unlock(&group->mutex);
|
||||
|
||||
/*
|
||||
|
|
@ -2185,10 +2191,26 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
|
|||
|
||||
int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
|
||||
{
|
||||
if (dev->iommu && dev->iommu->attach_deferred)
|
||||
return __iommu_attach_device(domain, dev, NULL);
|
||||
/*
|
||||
* This is called on the dma mapping fast path so avoid locking. This is
|
||||
* racy, but we have an expectation that the driver will setup its DMAs
|
||||
* inside probe while being single threaded to avoid racing.
|
||||
*/
|
||||
if (!dev->iommu || !dev->iommu->attach_deferred)
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
guard(mutex)(&dev->iommu_group->mutex);
|
||||
|
||||
/*
|
||||
* This is a concurrent attach during a device reset. Reject it until
|
||||
* pci_dev_reset_iommu_done() attaches the device to group->domain.
|
||||
*
|
||||
* Note that this might fail the iommu_dma_map(). But there's nothing
|
||||
* more we can do here.
|
||||
*/
|
||||
if (dev->iommu_group->resetting_domain)
|
||||
return -EBUSY;
|
||||
return __iommu_attach_device(domain, dev, NULL);
|
||||
}
|
||||
|
||||
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
|
||||
|
|
@ -2210,6 +2232,15 @@ out_unlock:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_detach_device);
|
||||
|
||||
/**
|
||||
* iommu_get_domain_for_dev() - Return the DMA API domain pointer
|
||||
* @dev: Device to query
|
||||
*
|
||||
* This function can be called within a driver bound to dev. The returned
|
||||
* pointer is valid for the lifetime of the bound driver.
|
||||
*
|
||||
* It should not be called by drivers with driver_managed_dma = true.
|
||||
*/
|
||||
struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
|
||||
{
|
||||
/* Caller must be a probed driver on dev */
|
||||
|
|
@ -2218,10 +2249,40 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
|
|||
if (!group)
|
||||
return NULL;
|
||||
|
||||
lockdep_assert_not_held(&group->mutex);
|
||||
|
||||
return group->domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
|
||||
|
||||
/**
|
||||
* iommu_driver_get_domain_for_dev() - Return the driver-level domain pointer
|
||||
* @dev: Device to query
|
||||
*
|
||||
* This function can be called by an iommu driver that wants to get the physical
|
||||
* domain within an iommu callback function where group->mutex is held.
|
||||
*/
|
||||
struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev)
|
||||
{
|
||||
struct iommu_group *group = dev->iommu_group;
|
||||
|
||||
lockdep_assert_held(&group->mutex);
|
||||
|
||||
/*
|
||||
* Driver handles the low-level __iommu_attach_device(), including the
|
||||
* one invoked by pci_dev_reset_iommu_done() re-attaching the device to
|
||||
* the cached group->domain. In this case, the driver must get the old
|
||||
* domain from group->resetting_domain rather than group->domain. This
|
||||
* prevents it from re-attaching the device from group->domain (old) to
|
||||
* group->domain (new).
|
||||
*/
|
||||
if (group->resetting_domain)
|
||||
return group->resetting_domain;
|
||||
|
||||
return group->domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev);
|
||||
|
||||
/*
|
||||
* For IOMMU_DOMAIN_DMA implementations which already provide their own
|
||||
* guarantees that the group and its default domain are valid and correct.
|
||||
|
|
@ -2374,6 +2435,13 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
|
|||
if (WARN_ON(!new_domain))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* This is a concurrent attach during a device reset. Reject it until
|
||||
* pci_dev_reset_iommu_done() attaches the device to group->domain.
|
||||
*/
|
||||
if (group->resetting_domain)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Changing the domain is done by calling attach_dev() on the new
|
||||
* domain. This switch does not have to be atomic and DMA can be
|
||||
|
|
@ -2562,10 +2630,12 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
|
|||
}
|
||||
|
||||
/* unroll mapping in case something went wrong */
|
||||
if (ret)
|
||||
if (ret) {
|
||||
iommu_unmap(domain, orig_iova, orig_size - size);
|
||||
else
|
||||
} else {
|
||||
trace_map(orig_iova, orig_paddr, orig_size);
|
||||
iommu_debug_map(domain, orig_paddr, orig_size);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -2627,6 +2697,8 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
|
|||
|
||||
pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
|
||||
|
||||
iommu_debug_unmap_begin(domain, iova, size);
|
||||
|
||||
/*
|
||||
* Keep iterating until we either unmap 'size' bytes (or more)
|
||||
* or we hit an area that isn't mapped.
|
||||
|
|
@ -2647,6 +2719,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
|
|||
}
|
||||
|
||||
trace_unmap(orig_iova, size, unmapped);
|
||||
iommu_debug_unmap_end(domain, orig_iova, size, unmapped);
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
|
|
@ -3148,7 +3221,7 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
|
|||
|
||||
/* Make sure dma_ops is appropriatley set */
|
||||
for_each_group_device(group, gdev)
|
||||
iommu_setup_dma_ops(gdev->dev);
|
||||
iommu_setup_dma_ops(gdev->dev, group->default_domain);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&group->mutex);
|
||||
|
|
@ -3492,6 +3565,16 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
|
|||
return -EINVAL;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
|
||||
/*
|
||||
* This is a concurrent attach during a device reset. Reject it until
|
||||
* pci_dev_reset_iommu_done() attaches the device to group->domain.
|
||||
*/
|
||||
if (group->resetting_domain) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
for_each_group_device(group, device) {
|
||||
/*
|
||||
* Skip PASID validation for devices without PASID support
|
||||
|
|
@ -3575,6 +3658,16 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
|
|||
return -EINVAL;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
|
||||
/*
|
||||
* This is a concurrent attach during a device reset. Reject it until
|
||||
* pci_dev_reset_iommu_done() attaches the device to group->domain.
|
||||
*/
|
||||
if (group->resetting_domain) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
entry = iommu_make_pasid_array_entry(domain, handle);
|
||||
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
|
||||
XA_ZERO_ENTRY, GFP_KERNEL);
|
||||
|
|
@ -3832,6 +3925,127 @@ err_unlock:
|
|||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
|
||||
|
||||
/**
|
||||
* pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset
|
||||
* @pdev: PCI device that is going to enter a reset routine
|
||||
*
|
||||
* The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block
|
||||
* ATS before initiating a reset. This means that a PCIe device during the reset
|
||||
* routine wants to block any IOMMU activity: translation and ATS invalidation.
|
||||
*
|
||||
* This function attaches the device's RID/PASID(s) the group->blocking_domain,
|
||||
* setting the group->resetting_domain. This allows the IOMMU driver pausing any
|
||||
* IOMMU activity while leaving the group->domain pointer intact. Later when the
|
||||
* reset is finished, pci_dev_reset_iommu_done() can restore everything.
|
||||
*
|
||||
* Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done()
|
||||
* before/after the core-level reset routine, to unset the resetting_domain.
|
||||
*
|
||||
* Return: 0 on success or negative error code if the preparation failed.
|
||||
*
|
||||
* These two functions are designed to be used by PCI reset functions that would
|
||||
* not invoke any racy iommu_release_device(), since PCI sysfs node gets removed
|
||||
* before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other
|
||||
* case, callers must ensure there will be no racy iommu_release_device() call,
|
||||
* which otherwise would UAF the dev->iommu_group pointer.
|
||||
*/
|
||||
int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
|
||||
{
|
||||
struct iommu_group *group = pdev->dev.iommu_group;
|
||||
unsigned long pasid;
|
||||
void *entry;
|
||||
int ret;
|
||||
|
||||
if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))
|
||||
return 0;
|
||||
|
||||
guard(mutex)(&group->mutex);
|
||||
|
||||
/* Re-entry is not allowed */
|
||||
if (WARN_ON(group->resetting_domain))
|
||||
return -EBUSY;
|
||||
|
||||
ret = __iommu_group_alloc_blocking_domain(group);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Stage RID domain at blocking_domain while retaining group->domain */
|
||||
if (group->domain != group->blocking_domain) {
|
||||
ret = __iommu_attach_device(group->blocking_domain, &pdev->dev,
|
||||
group->domain);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stage PASID domains at blocking_domain while retaining pasid_array.
|
||||
*
|
||||
* The pasid_array is mostly fenced by group->mutex, except one reader
|
||||
* in iommu_attach_handle_get(), so it's safe to read without xa_lock.
|
||||
*/
|
||||
xa_for_each_start(&group->pasid_array, pasid, entry, 1)
|
||||
iommu_remove_dev_pasid(&pdev->dev, pasid,
|
||||
pasid_array_entry_to_domain(entry));
|
||||
|
||||
group->resetting_domain = group->blocking_domain;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);
|
||||
|
||||
/**
|
||||
* pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done
|
||||
* @pdev: PCI device that has finished a reset routine
|
||||
*
|
||||
* After a PCIe device finishes a reset routine, it wants to restore its IOMMU
|
||||
* IOMMU activity, including new translation as well as cache invalidation, by
|
||||
* re-attaching all RID/PASID of the device's back to the domains retained in
|
||||
* the core-level structure.
|
||||
*
|
||||
* Caller must pair it with a successful pci_dev_reset_iommu_prepare().
|
||||
*
|
||||
* Note that, although unlikely, there is a risk that re-attaching domains might
|
||||
* fail due to some unexpected happening like OOM.
|
||||
*/
|
||||
void pci_dev_reset_iommu_done(struct pci_dev *pdev)
|
||||
{
|
||||
struct iommu_group *group = pdev->dev.iommu_group;
|
||||
unsigned long pasid;
|
||||
void *entry;
|
||||
|
||||
if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))
|
||||
return;
|
||||
|
||||
guard(mutex)(&group->mutex);
|
||||
|
||||
/* pci_dev_reset_iommu_prepare() was bypassed for the device */
|
||||
if (!group->resetting_domain)
|
||||
return;
|
||||
|
||||
/* pci_dev_reset_iommu_prepare() was not successfully called */
|
||||
if (WARN_ON(!group->blocking_domain))
|
||||
return;
|
||||
|
||||
/* Re-attach RID domain back to group->domain */
|
||||
if (group->domain != group->blocking_domain) {
|
||||
WARN_ON(__iommu_attach_device(group->domain, &pdev->dev,
|
||||
group->blocking_domain));
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-attach PASID domains back to the domains retained in pasid_array.
|
||||
*
|
||||
* The pasid_array is mostly fenced by group->mutex, except one reader
|
||||
* in iommu_attach_handle_get(), so it's safe to read without xa_lock.
|
||||
*/
|
||||
xa_for_each_start(&group->pasid_array, pasid, entry, 1)
|
||||
WARN_ON(__iommu_set_group_pasid(
|
||||
pasid_array_entry_to_domain(entry), group, pasid,
|
||||
group->blocking_domain));
|
||||
|
||||
group->resetting_domain = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
|
||||
/**
|
||||
* iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <linux/delay.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/msi.h>
|
||||
|
|
@ -971,6 +972,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
|
|||
int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
|
||||
{
|
||||
acpi_handle handle = ACPI_HANDLE(&dev->dev);
|
||||
int ret;
|
||||
|
||||
if (!handle || !acpi_has_method(handle, "_RST"))
|
||||
return -ENOTTY;
|
||||
|
|
@ -978,12 +980,19 @@ int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
|
|||
if (probe)
|
||||
return 0;
|
||||
|
||||
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
|
||||
pci_warn(dev, "ACPI _RST failed\n");
|
||||
return -ENOTTY;
|
||||
ret = pci_dev_reset_iommu_prepare(dev);
|
||||
if (ret) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
|
||||
pci_warn(dev, "ACPI _RST failed\n");
|
||||
ret = -ENOTTY;
|
||||
}
|
||||
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool acpi_pci_power_manageable(struct pci_dev *dev)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/delay.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/msi.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/pci.h>
|
||||
|
|
@ -25,6 +26,7 @@
|
|||
#include <linux/logic_pio.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/pci-ats.h>
|
||||
#include <linux/pci_hotplug.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/dma.h>
|
||||
|
|
@ -4330,13 +4332,22 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction);
|
|||
*/
|
||||
int pcie_flr(struct pci_dev *dev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!pci_wait_for_pending_transaction(dev))
|
||||
pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
|
||||
|
||||
/* Have to call it after waiting for pending DMA transaction */
|
||||
ret = pci_dev_reset_iommu_prepare(dev);
|
||||
if (ret) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
|
||||
|
||||
if (dev->imm_ready)
|
||||
return 0;
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
|
||||
|
|
@ -4345,7 +4356,10 @@ int pcie_flr(struct pci_dev *dev)
|
|||
*/
|
||||
msleep(100);
|
||||
|
||||
return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
|
||||
ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
|
||||
done:
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pcie_flr);
|
||||
|
||||
|
|
@ -4373,6 +4387,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr);
|
|||
|
||||
static int pci_af_flr(struct pci_dev *dev, bool probe)
|
||||
{
|
||||
int ret;
|
||||
int pos;
|
||||
u8 cap;
|
||||
|
||||
|
|
@ -4399,10 +4414,17 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
|
|||
PCI_AF_STATUS_TP << 8))
|
||||
pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
|
||||
|
||||
/* Have to call it after waiting for pending DMA transaction */
|
||||
ret = pci_dev_reset_iommu_prepare(dev);
|
||||
if (ret) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
|
||||
|
||||
if (dev->imm_ready)
|
||||
return 0;
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
|
||||
|
|
@ -4412,7 +4434,10 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
|
|||
*/
|
||||
msleep(100);
|
||||
|
||||
return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
|
||||
ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
|
||||
done:
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -4433,6 +4458,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
|
|||
static int pci_pm_reset(struct pci_dev *dev, bool probe)
|
||||
{
|
||||
u16 csr;
|
||||
int ret;
|
||||
|
||||
if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
|
||||
return -ENOTTY;
|
||||
|
|
@ -4447,6 +4473,12 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
|
|||
if (dev->current_state != PCI_D0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = pci_dev_reset_iommu_prepare(dev);
|
||||
if (ret) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
csr &= ~PCI_PM_CTRL_STATE_MASK;
|
||||
csr |= PCI_D3hot;
|
||||
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
|
||||
|
|
@ -4457,7 +4489,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
|
|||
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
|
||||
pci_dev_d3_sleep(dev);
|
||||
|
||||
return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
|
||||
ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -4885,10 +4919,20 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
|
|||
return -ENOTTY;
|
||||
}
|
||||
|
||||
rc = pci_dev_reset_iommu_prepare(dev);
|
||||
if (rc) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = pci_dev_reset_slot_function(dev, probe);
|
||||
if (rc != -ENOTTY)
|
||||
return rc;
|
||||
return pci_parent_bus_reset(dev, probe);
|
||||
goto done;
|
||||
|
||||
rc = pci_parent_bus_reset(dev, probe);
|
||||
done:
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
|
||||
|
|
@ -4912,6 +4956,12 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
|
|||
if (rc)
|
||||
return -ENOTTY;
|
||||
|
||||
rc = pci_dev_reset_iommu_prepare(dev);
|
||||
if (rc) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
|
||||
val = reg;
|
||||
} else {
|
||||
|
|
@ -4926,6 +4976,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
|
|||
pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
|
||||
reg);
|
||||
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/isa-dma.h> /* isa_dma_bridge_buggy */
|
||||
#include <linux/init.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/dmi.h>
|
||||
|
|
@ -4228,6 +4229,22 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
|
|||
{ 0 }
|
||||
};
|
||||
|
||||
static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,
|
||||
const struct pci_dev_reset_methods *i)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = pci_dev_reset_iommu_prepare(dev);
|
||||
if (ret) {
|
||||
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = i->reset(dev, probe);
|
||||
pci_dev_reset_iommu_done(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* These device-specific reset methods are here rather than in a driver
|
||||
* because when a host assigns a device to a guest VM, the host may need
|
||||
|
|
@ -4242,7 +4259,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
|
|||
i->vendor == (u16)PCI_ANY_ID) &&
|
||||
(i->device == dev->device ||
|
||||
i->device == (u16)PCI_ANY_ID))
|
||||
return i->reset(dev, probe);
|
||||
return __pci_dev_specific_reset(dev, probe, i);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
|
|
|||
32
include/linux/iommu-debug-pagealloc.h
Normal file
32
include/linux/iommu-debug-pagealloc.h
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 - Google Inc
|
||||
* Author: Mostafa Saleh <smostafa@google.com>
|
||||
* IOMMU API debug page alloc sanitizer
|
||||
*/
|
||||
|
||||
#ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
|
||||
#define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
|
||||
|
||||
#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
|
||||
DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
|
||||
|
||||
extern struct page_ext_operations page_iommu_debug_ops;
|
||||
|
||||
void __iommu_debug_check_unmapped(const struct page *page, int numpages);
|
||||
|
||||
static inline void iommu_debug_check_unmapped(const struct page *page, int numpages)
|
||||
{
|
||||
if (static_branch_unlikely(&iommu_debug_initialized))
|
||||
__iommu_debug_check_unmapped(page, numpages);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void iommu_debug_check_unmapped(const struct page *page,
|
||||
int numpages)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
|
||||
|
||||
#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
|
||||
|
|
@ -910,6 +910,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
|
|||
extern void iommu_detach_device(struct iommu_domain *domain,
|
||||
struct device *dev);
|
||||
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
|
||||
struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev);
|
||||
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
|
||||
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
|
||||
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
|
||||
|
|
@ -1187,6 +1188,10 @@ void iommu_detach_device_pasid(struct iommu_domain *domain,
|
|||
struct device *dev, ioasid_t pasid);
|
||||
ioasid_t iommu_alloc_global_pasid(struct device *dev);
|
||||
void iommu_free_global_pasid(ioasid_t pasid);
|
||||
|
||||
/* PCI device reset functions */
|
||||
int pci_dev_reset_iommu_prepare(struct pci_dev *pdev);
|
||||
void pci_dev_reset_iommu_done(struct pci_dev *pdev);
|
||||
#else /* CONFIG_IOMMU_API */
|
||||
|
||||
struct iommu_ops {};
|
||||
|
|
@ -1510,6 +1515,15 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
|
|||
}
|
||||
|
||||
static inline void iommu_free_global_pasid(ioasid_t pasid) {}
|
||||
|
||||
static inline int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void pci_dev_reset_iommu_done(struct pci_dev *pdev)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_API */
|
||||
|
||||
#ifdef CONFIG_IRQ_MSI_IOMMU
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#include <linux/rcuwait.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/iommu-debug-pagealloc.h>
|
||||
|
||||
struct mempolicy;
|
||||
struct anon_vma;
|
||||
|
|
@ -4137,12 +4138,16 @@ extern void __kernel_map_pages(struct page *page, int numpages, int enable);
|
|||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||
static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
|
||||
{
|
||||
iommu_debug_check_unmapped(page, numpages);
|
||||
|
||||
if (debug_pagealloc_enabled_static())
|
||||
__kernel_map_pages(page, numpages, 1);
|
||||
}
|
||||
|
||||
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
|
||||
{
|
||||
iommu_debug_check_unmapped(page, numpages);
|
||||
|
||||
if (debug_pagealloc_enabled_static())
|
||||
__kernel_map_pages(page, numpages, 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn)
|
|||
#endif
|
||||
|
||||
extern struct page_ext *page_ext_get(const struct page *page);
|
||||
extern struct page_ext *page_ext_from_phys(phys_addr_t phys);
|
||||
extern void page_ext_put(struct page_ext *page_ext);
|
||||
extern struct page_ext *page_ext_lookup(unsigned long pfn);
|
||||
|
||||
|
|
@ -215,6 +216,11 @@ static inline struct page_ext *page_ext_get(const struct page *page)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct page_ext *page_ext_from_phys(phys_addr_t phys)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void page_ext_put(struct page_ext *page_ext)
|
||||
{
|
||||
}
|
||||
|
|
|
|||
|
|
@ -465,16 +465,27 @@ struct iommu_hwpt_arm_smmuv3 {
|
|||
__aligned_le64 ste[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_hwpt_amd_guest - AMD IOMMU guest I/O page table data
|
||||
* (IOMMU_HWPT_DATA_AMD_GUEST)
|
||||
* @dte: Guest Device Table Entry (DTE)
|
||||
*/
|
||||
struct iommu_hwpt_amd_guest {
|
||||
__aligned_u64 dte[4];
|
||||
};
|
||||
|
||||
/**
|
||||
* enum iommu_hwpt_data_type - IOMMU HWPT Data Type
|
||||
* @IOMMU_HWPT_DATA_NONE: no data
|
||||
* @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
|
||||
* @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
|
||||
* @IOMMU_HWPT_DATA_AMD_GUEST: AMD IOMMU guest page table
|
||||
*/
|
||||
enum iommu_hwpt_data_type {
|
||||
IOMMU_HWPT_DATA_NONE = 0,
|
||||
IOMMU_HWPT_DATA_VTD_S1 = 1,
|
||||
IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
|
||||
IOMMU_HWPT_DATA_AMD_GUEST = 3,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -623,6 +634,32 @@ struct iommu_hw_info_tegra241_cmdqv {
|
|||
__u8 __reserved;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_hw_info_amd - AMD IOMMU device info
|
||||
*
|
||||
* @efr : Value of AMD IOMMU Extended Feature Register (EFR)
|
||||
* @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2)
|
||||
*
|
||||
* Please See description of these registers in the following sections of
|
||||
* the AMD I/O Virtualization Technology (IOMMU) Specification.
|
||||
* (https://docs.amd.com/v/u/en-US/48882_3.10_PUB)
|
||||
*
|
||||
* - MMIO Offset 0030h IOMMU Extended Feature Register
|
||||
* - MMIO Offset 01A0h IOMMU Extended Feature 2 Register
|
||||
*
|
||||
* Note: The EFR and EFR2 are raw values reported by hardware.
|
||||
* VMM is responsible to determine the appropriate flags to be exposed to
|
||||
* the VM since cetertain features are not currently supported by the kernel
|
||||
* for HW-vIOMMU.
|
||||
*
|
||||
* Current VMM-allowed list of feature flags are:
|
||||
* - EFR[GTSup, GASup, GioSup, PPRSup, EPHSup, GATS, GLX, PASmax]
|
||||
*/
|
||||
struct iommu_hw_info_amd {
|
||||
__aligned_u64 efr;
|
||||
__aligned_u64 efr2;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum iommu_hw_info_type - IOMMU Hardware Info Types
|
||||
* @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
|
||||
|
|
@ -632,6 +669,7 @@ struct iommu_hw_info_tegra241_cmdqv {
|
|||
* @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
|
||||
* @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
|
||||
* SMMUv3) info type
|
||||
* @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type
|
||||
*/
|
||||
enum iommu_hw_info_type {
|
||||
IOMMU_HW_INFO_TYPE_NONE = 0,
|
||||
|
|
@ -639,6 +677,7 @@ enum iommu_hw_info_type {
|
|||
IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
|
||||
IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
|
||||
IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
|
||||
IOMMU_HW_INFO_TYPE_AMD = 4,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -964,6 +964,10 @@ struct vfio_device_bind_iommufd {
|
|||
* hwpt corresponding to the given pt_id.
|
||||
*
|
||||
* Return: 0 on success, -errno on failure.
|
||||
*
|
||||
* When a device is resetting, -EBUSY will be returned to reject any concurrent
|
||||
* attachment to the resetting device itself or any sibling device in the IOMMU
|
||||
* group having the resetting device.
|
||||
*/
|
||||
struct vfio_device_attach_iommufd_pt {
|
||||
__u32 argsz;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/page_table_check.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/pgalloc_tag.h>
|
||||
#include <linux/iommu-debug-pagealloc.h>
|
||||
|
||||
/*
|
||||
* struct page extension
|
||||
|
|
@ -89,6 +90,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
|
|||
#ifdef CONFIG_PAGE_TABLE_CHECK
|
||||
&page_table_check_ops,
|
||||
#endif
|
||||
#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
|
||||
&page_iommu_debug_ops,
|
||||
#endif
|
||||
};
|
||||
|
||||
unsigned long page_ext_size;
|
||||
|
|
@ -534,6 +538,29 @@ struct page_ext *page_ext_get(const struct page *page)
|
|||
return page_ext;
|
||||
}
|
||||
|
||||
/**
|
||||
* page_ext_from_phys() - Get the page_ext structure for a physical address.
|
||||
* @phys: The physical address to query.
|
||||
*
|
||||
* This function safely gets the `struct page_ext` associated with a given
|
||||
* physical address. It performs validation to ensure the address corresponds
|
||||
* to a valid, online struct page before attempting to access it.
|
||||
* It returns NULL for MMIO, ZONE_DEVICE, holes and offline memory.
|
||||
*
|
||||
* Return: NULL if no page_ext exists for this physical address.
|
||||
* Context: Any context. Caller may not sleep until they have called
|
||||
* page_ext_put().
|
||||
*/
|
||||
struct page_ext *page_ext_from_phys(phys_addr_t phys)
|
||||
{
|
||||
struct page *page = pfn_to_online_page(__phys_to_pfn(phys));
|
||||
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
return page_ext_get(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* page_ext_put() - Working with page extended information is done.
|
||||
* @page_ext: Page extended information received from page_ext_get().
|
||||
|
|
|
|||
|
|
@ -56,9 +56,10 @@
|
|||
#include <linux/fdtable.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/i2c.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io-pgtable.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
|
|
|||
5
rust/kernel/iommu/mod.rs
Normal file
5
rust/kernel/iommu/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
//! Rust support related to IOMMU.
|
||||
|
||||
pub mod pgtable;
|
||||
279
rust/kernel/iommu/pgtable.rs
Normal file
279
rust/kernel/iommu/pgtable.rs
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
//! IOMMU page table management.
|
||||
//!
|
||||
//! C header: [`include/linux/io-pgtable.h`](srctree/include/linux/io-pgtable.h)
|
||||
|
||||
use core::{
|
||||
marker::PhantomData,
|
||||
ptr::NonNull, //
|
||||
};
|
||||
|
||||
use crate::{
|
||||
alloc,
|
||||
bindings,
|
||||
device::{
|
||||
Bound,
|
||||
Device, //
|
||||
},
|
||||
devres::Devres,
|
||||
error::to_result,
|
||||
io::PhysAddr,
|
||||
prelude::*, //
|
||||
};
|
||||
|
||||
use bindings::io_pgtable_fmt;
|
||||
|
||||
/// Protection flags used with IOMMU mappings.
|
||||
pub mod prot {
|
||||
/// Read access.
|
||||
pub const READ: u32 = bindings::IOMMU_READ;
|
||||
/// Write access.
|
||||
pub const WRITE: u32 = bindings::IOMMU_WRITE;
|
||||
/// Request cache coherency.
|
||||
pub const CACHE: u32 = bindings::IOMMU_CACHE;
|
||||
/// Request no-execute permission.
|
||||
pub const NOEXEC: u32 = bindings::IOMMU_NOEXEC;
|
||||
/// MMIO peripheral mapping.
|
||||
pub const MMIO: u32 = bindings::IOMMU_MMIO;
|
||||
/// Privileged mapping.
|
||||
pub const PRIVILEGED: u32 = bindings::IOMMU_PRIV;
|
||||
}
|
||||
|
||||
/// Represents a requested `io_pgtable` configuration.
|
||||
pub struct Config {
|
||||
/// Quirk bitmask (type-specific).
|
||||
pub quirks: usize,
|
||||
/// Valid page sizes, as a bitmask of powers of two.
|
||||
pub pgsize_bitmap: usize,
|
||||
/// Input address space size in bits.
|
||||
pub ias: u32,
|
||||
/// Output address space size in bits.
|
||||
pub oas: u32,
|
||||
/// IOMMU uses coherent accesses for page table walks.
|
||||
pub coherent_walk: bool,
|
||||
}
|
||||
|
||||
/// An io page table using a specific format.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// The pointer references a valid io page table.
|
||||
pub struct IoPageTable<F: IoPageTableFmt> {
|
||||
ptr: NonNull<bindings::io_pgtable_ops>,
|
||||
_marker: PhantomData<F>,
|
||||
}
|
||||
|
||||
// SAFETY: `struct io_pgtable_ops` is not restricted to a single thread.
|
||||
unsafe impl<F: IoPageTableFmt> Send for IoPageTable<F> {}
|
||||
// SAFETY: `struct io_pgtable_ops` may be accessed concurrently.
|
||||
unsafe impl<F: IoPageTableFmt> Sync for IoPageTable<F> {}
|
||||
|
||||
/// The format used by this page table.
|
||||
pub trait IoPageTableFmt: 'static {
|
||||
/// The value representing this format.
|
||||
const FORMAT: io_pgtable_fmt;
|
||||
}
|
||||
|
||||
impl<F: IoPageTableFmt> IoPageTable<F> {
|
||||
/// Create a new `IoPageTable` as a device resource.
|
||||
#[inline]
|
||||
pub fn new(
|
||||
dev: &Device<Bound>,
|
||||
config: Config,
|
||||
) -> impl PinInit<Devres<IoPageTable<F>>, Error> + '_ {
|
||||
// SAFETY: Devres ensures that the value is dropped during device unbind.
|
||||
Devres::new(dev, unsafe { Self::new_raw(dev, config) })
|
||||
}
|
||||
|
||||
/// Create a new `IoPageTable`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// If successful, then the returned `IoPageTable` must be dropped before the device is
|
||||
/// unbound.
|
||||
#[inline]
|
||||
pub unsafe fn new_raw(dev: &Device<Bound>, config: Config) -> Result<IoPageTable<F>> {
|
||||
let mut raw_cfg = bindings::io_pgtable_cfg {
|
||||
quirks: config.quirks,
|
||||
pgsize_bitmap: config.pgsize_bitmap,
|
||||
ias: config.ias,
|
||||
oas: config.oas,
|
||||
coherent_walk: config.coherent_walk,
|
||||
tlb: &raw const NOOP_FLUSH_OPS,
|
||||
iommu_dev: dev.as_raw(),
|
||||
// SAFETY: All zeroes is a valid value for `struct io_pgtable_cfg`.
|
||||
..unsafe { core::mem::zeroed() }
|
||||
};
|
||||
|
||||
// SAFETY:
|
||||
// * The raw_cfg pointer is valid for the duration of this call.
|
||||
// * The provided `FLUSH_OPS` contains valid function pointers that accept a null pointer
|
||||
// as cookie.
|
||||
// * The caller ensures that the io pgtable does not outlive the device.
|
||||
let ops = unsafe {
|
||||
bindings::alloc_io_pgtable_ops(F::FORMAT, &mut raw_cfg, core::ptr::null_mut())
|
||||
};
|
||||
|
||||
// INVARIANT: We successfully created a valid page table.
|
||||
Ok(IoPageTable {
|
||||
ptr: NonNull::new(ops).ok_or(ENOMEM)?,
|
||||
_marker: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
/// Obtain a raw pointer to the underlying `struct io_pgtable_ops`.
|
||||
#[inline]
|
||||
pub fn raw_ops(&self) -> *mut bindings::io_pgtable_ops {
|
||||
self.ptr.as_ptr()
|
||||
}
|
||||
|
||||
/// Obtain a raw pointer to the underlying `struct io_pgtable`.
|
||||
#[inline]
|
||||
pub fn raw_pgtable(&self) -> *mut bindings::io_pgtable {
|
||||
// SAFETY: The io_pgtable_ops of an io-pgtable is always the ops field of a io_pgtable.
|
||||
unsafe { kernel::container_of!(self.raw_ops(), bindings::io_pgtable, ops) }
|
||||
}
|
||||
|
||||
/// Obtain a raw pointer to the underlying `struct io_pgtable_cfg`.
|
||||
#[inline]
|
||||
pub fn raw_cfg(&self) -> *mut bindings::io_pgtable_cfg {
|
||||
// SAFETY: The `raw_pgtable()` method returns a valid pointer.
|
||||
unsafe { &raw mut (*self.raw_pgtable()).cfg }
|
||||
}
|
||||
|
||||
/// Map a physically contiguous range of pages of the same size.
|
||||
///
|
||||
/// Even if successful, this operation may not map the entire range. In that case, only a
|
||||
/// prefix of the range is mapped, and the returned integer indicates its length in bytes. In
|
||||
/// this case, the caller will usually call `map_pages` again for the remaining range.
|
||||
///
|
||||
/// The returned [`Result`] indicates whether an error was encountered while mapping pages.
|
||||
/// Note that this may return a non-zero length even if an error was encountered. The caller
|
||||
/// will usually [unmap the relevant pages](Self::unmap_pages) on error.
|
||||
///
|
||||
/// The caller must flush the TLB before using the pgtable to access the newly created mapping.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// * No other io-pgtable operation may access the range `iova .. iova+pgsize*pgcount` while
|
||||
/// this `map_pages` operation executes.
|
||||
/// * This page table must not contain any mapping that overlaps with the mapping created by
|
||||
/// this call.
|
||||
/// * If this page table is live, then the caller must ensure that it's okay to access the
|
||||
/// physical address being mapped for the duration in which it is mapped.
|
||||
#[inline]
|
||||
pub unsafe fn map_pages(
|
||||
&self,
|
||||
iova: usize,
|
||||
paddr: PhysAddr,
|
||||
pgsize: usize,
|
||||
pgcount: usize,
|
||||
prot: u32,
|
||||
flags: alloc::Flags,
|
||||
) -> (usize, Result) {
|
||||
let mut mapped: usize = 0;
|
||||
|
||||
// SAFETY: The `map_pages` function in `io_pgtable_ops` is never null.
|
||||
let map_pages = unsafe { (*self.raw_ops()).map_pages.unwrap_unchecked() };
|
||||
|
||||
// SAFETY: The safety requirements of this method are sufficient to call `map_pages`.
|
||||
let ret = to_result(unsafe {
|
||||
(map_pages)(
|
||||
self.raw_ops(),
|
||||
iova,
|
||||
paddr,
|
||||
pgsize,
|
||||
pgcount,
|
||||
prot as i32,
|
||||
flags.as_raw(),
|
||||
&mut mapped,
|
||||
)
|
||||
});
|
||||
|
||||
(mapped, ret)
|
||||
}
|
||||
|
||||
/// Unmap a range of virtually contiguous pages of the same size.
|
||||
///
|
||||
/// This may not unmap the entire range, and returns the length of the unmapped prefix in
|
||||
/// bytes.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// * No other io-pgtable operation may access the range `iova .. iova+pgsize*pgcount` while
|
||||
/// this `unmap_pages` operation executes.
|
||||
/// * This page table must contain one or more consecutive mappings starting at `iova` whose
|
||||
/// total size is `pgcount * pgsize`.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub unsafe fn unmap_pages(&self, iova: usize, pgsize: usize, pgcount: usize) -> usize {
|
||||
// SAFETY: The `unmap_pages` function in `io_pgtable_ops` is never null.
|
||||
let unmap_pages = unsafe { (*self.raw_ops()).unmap_pages.unwrap_unchecked() };
|
||||
|
||||
// SAFETY: The safety requirements of this method are sufficient to call `unmap_pages`.
|
||||
unsafe { (unmap_pages)(self.raw_ops(), iova, pgsize, pgcount, core::ptr::null_mut()) }
|
||||
}
|
||||
}
|
||||
|
||||
// For the initial users of these rust bindings, the GPU FW is managing the IOTLB and performs all
|
||||
// required invalidations using a range. There is no need for it get ARM style invalidation
|
||||
// instructions from the page table code.
|
||||
//
|
||||
// Support for flushing the TLB with ARM style invalidation instructions may be added in the
|
||||
// future.
|
||||
static NOOP_FLUSH_OPS: bindings::iommu_flush_ops = bindings::iommu_flush_ops {
|
||||
tlb_flush_all: Some(rust_tlb_flush_all_noop),
|
||||
tlb_flush_walk: Some(rust_tlb_flush_walk_noop),
|
||||
tlb_add_page: None,
|
||||
};
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn rust_tlb_flush_all_noop(_cookie: *mut core::ffi::c_void) {}
|
||||
|
||||
#[no_mangle]
|
||||
extern "C" fn rust_tlb_flush_walk_noop(
|
||||
_iova: usize,
|
||||
_size: usize,
|
||||
_granule: usize,
|
||||
_cookie: *mut core::ffi::c_void,
|
||||
) {
|
||||
}
|
||||
|
||||
impl<F: IoPageTableFmt> Drop for IoPageTable<F> {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: The caller of `Self::ttbr()` promised that the page table is not live when this
|
||||
// destructor runs.
|
||||
unsafe { bindings::free_io_pgtable_ops(self.raw_ops()) };
|
||||
}
|
||||
}
|
||||
|
||||
/// The `ARM_64_LPAE_S1` page table format.
|
||||
pub enum ARM64LPAES1 {}
|
||||
|
||||
impl IoPageTableFmt for ARM64LPAES1 {
|
||||
const FORMAT: io_pgtable_fmt = bindings::io_pgtable_fmt_ARM_64_LPAE_S1 as io_pgtable_fmt;
|
||||
}
|
||||
|
||||
impl IoPageTable<ARM64LPAES1> {
|
||||
/// Access the `ttbr` field of the configuration.
|
||||
///
|
||||
/// This is the physical address of the page table, which may be passed to the device that
|
||||
/// needs to use it.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The caller must ensure that the device stops using the page table before dropping it.
|
||||
#[inline]
|
||||
pub unsafe fn ttbr(&self) -> u64 {
|
||||
// SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.
|
||||
unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.ttbr }
|
||||
}
|
||||
|
||||
/// Access the `mair` field of the configuration.
|
||||
#[inline]
|
||||
pub fn mair(&self) -> u64 {
|
||||
// SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.
|
||||
unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.mair }
|
||||
}
|
||||
}
|
||||
|
|
@ -103,6 +103,7 @@ pub mod id_pool;
|
|||
pub mod init;
|
||||
pub mod io;
|
||||
pub mod ioctl;
|
||||
pub mod iommu;
|
||||
pub mod iov;
|
||||
pub mod irq;
|
||||
pub mod jump_label;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue