linux/drivers/dax/hmem/device.c
Dan Williams bc62f5b308 dax/hmem, e820, resource: Defer Soft Reserved insertion until hmem is ready
Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
instead of the iomem_resource tree at boot. Delay publishing these ranges
into the iomem hierarchy until ownership is resolved and the HMEM path
is ready to consume them.

Publishing Soft Reserved ranges into iomem too early conflicts with CXL
hotplug and prevents region assembly when those ranges overlap CXL
windows.

Follow up patches will reinsert Soft Reserved ranges into iomem after CXL
window publication is complete and HMEM is ready to claim the memory. This
provides a cleaner handoff between EFI-defined memory ranges and CXL
resource management without trimming or deleting resources later.

In the meantime "Soft Reserved" resources will no longer appear in
/proc/iomem, only their results. I.e. with "memmap=4G%4G+0xefffffff"

Before:
100000000-1ffffffff : Soft Reserved
  100000000-1ffffffff : dax1.0
    100000000-1ffffffff : System RAM (kmem)

After:
100000000-1ffffffff : dax1.0
  100000000-1ffffffff : System RAM (kmem)

The expectation is that this does not lead to a user visible regression
because the dax1.0 device is created in both instances.

Co-developed-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
[Smita: incorporate feedback from x86 maintainer review]
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Link: https://patch.msgid.link/20251120031925.87762-2-Smita.KoralahalliChannabasappa@amd.com
[djbw: cleanups and clarifications]
Link: https://lore.kernel.org/69443f707b025_1cee10022@dwillia2-mobl4.notmuch
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2026-01-16 09:02:36 -07:00

94 lines
2 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/dax.h>
#include <linux/mm.h>
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
static bool platform_initialized;
static DEFINE_MUTEX(hmem_resource_lock);
static struct resource hmem_active = {
.name = "HMEM devices",
.start = 0,
.end = -1,
.flags = IORESOURCE_MEM,
};
int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
{
struct resource *res;
int rc = 0;
mutex_lock(&hmem_resource_lock);
for (res = hmem_active.child; res; res = res->sibling) {
rc = fn(host, (int) res->desc, res);
if (rc)
break;
}
mutex_unlock(&hmem_resource_lock);
return rc;
}
EXPORT_SYMBOL_GPL(walk_hmem_resources);
static void __hmem_register_resource(int target_nid, struct resource *res)
{
struct platform_device *pdev;
struct resource *new;
int rc;
new = __request_region(&hmem_active, res->start, resource_size(res), "",
0);
if (!new) {
pr_debug("hmem range %pr already active\n", res);
return;
}
new->desc = target_nid;
if (platform_initialized)
return;
pdev = platform_device_alloc("hmem_platform", 0);
if (!pdev) {
pr_err_once("failed to register device-dax hmem_platform device\n");
return;
}
rc = platform_device_add(pdev);
if (rc)
platform_device_put(pdev);
else
platform_initialized = true;
}
void hmem_register_resource(int target_nid, struct resource *res)
{
if (nohmem)
return;
mutex_lock(&hmem_resource_lock);
__hmem_register_resource(target_nid, res);
mutex_unlock(&hmem_resource_lock);
}
static __init int hmem_register_one(struct resource *res, void *data)
{
hmem_register_resource(phys_to_target_node(res->start), res);
return 0;
}
static __init int hmem_init(void)
{
walk_soft_reserve_res(0, -1, NULL, hmem_register_one);
return 0;
}
/*
* As this is a fallback for address ranges unclaimed by the ACPI HMAT
* parsing it must be at an initcall level greater than hmat_init().
*/
device_initcall(hmem_init);