mirror of
https://github.com/torvalds/linux.git
synced 2026-03-07 23:04:33 +01:00
This reintroduces a concept removed by: commit d6cb41cc44 ("mm, hugetlb:
remove hugepages_treat_as_movable sysctl")
This sysctl provides flexibility between ZONE_MOVABLE use cases:
1) onlining memory in ZONE_MOVABLE to maintain hotplug compatibility
2) onlining memory in ZONE_MOVABLE to make hugepage allocate reliable
When ZONE_MOVABLE is used to make huge page allocation more reliable,
disallowing gigantic pages memory in this region is pointless. If hotplug
is not a requirement, we can loosen the restrictions to allow 1GB gigantic
pages in ZONE_MOVABLE.
Since 1GB can be difficult to migrate / has impacts on compaction /
defragmentation, we don't enable this by default. Notably, 1GB pages can
only be migrated if another 1GB page is available - so hot-unplug will
fail if such a page cannot be found.
However, since there are scenarios where gigantic pages are migratable, we
should allow use of these on movable regions.
When not valid 1GB is available for migration, hot-unplug will retry
indefinitely (or until interrupted). For example:
echo 0 > node0/hugepages/..-1GB/nr_hugepages # clear node0 1GB pages
echo 1 > node1/hugepages/..-1GB/nr_hugepages # reserve node1 1GB page
./alloc_huge_node1 & # Allocate a 1GB page on node1
./node1_offline & # attempt to offline all node1 memory
echo 1 > node0/hugepages/..-1GB/nr_hugepages # reserve node0 1GB page
In this example, node1_offline will block indefinitely until the final
step, when a node0 1GB page is made available.
Note: Boot-time CMA is not possible for driver-managed hotplug memory, as
CMA requires the memory to be registered as SystemRAM at boot time.
Additionally, 1GB huge pages are not supported by THP.
Link: https://lkml.kernel.org/r/20251221125603.2364174-1-gourry@gourry.net
Signed-off-by: Gregory Price <gourry@gourry.net>
Suggested-by: David Rientjes <rientjes@google.com>
Link: https://lore.kernel.org/all/20180201193132.Hk7vI_xaU%25akpm@linux-foundation.org/
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "David Hildenbrand (Red Hat)" <david@kernel.org>
Cc: Gregory Price <gourry@gourry.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
145 lines
3.3 KiB
C
145 lines
3.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* HugeTLB sysfs interfaces.
|
|
* (C) Nadia Yvette Chambers, April 2004
|
|
*/
|
|
|
|
#include <linux/sysctl.h>
|
|
|
|
#include "hugetlb_internal.h"
|
|
|
|
int movable_gigantic_pages;
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static int proc_hugetlb_doulongvec_minmax(const struct ctl_table *table, int write,
|
|
void *buffer, size_t *length,
|
|
loff_t *ppos, unsigned long *out)
|
|
{
|
|
struct ctl_table dup_table;
|
|
|
|
/*
|
|
* In order to avoid races with __do_proc_doulongvec_minmax(), we
|
|
* can duplicate the @table and alter the duplicate of it.
|
|
*/
|
|
dup_table = *table;
|
|
dup_table.data = out;
|
|
|
|
return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
|
|
}
|
|
|
|
static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
|
|
const struct ctl_table *table, int write,
|
|
void *buffer, size_t *length, loff_t *ppos)
|
|
{
|
|
struct hstate *h = &default_hstate;
|
|
unsigned long tmp = h->max_huge_pages;
|
|
int ret;
|
|
|
|
if (!hugepages_supported())
|
|
return -EOPNOTSUPP;
|
|
|
|
ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
|
|
&tmp);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (write)
|
|
ret = __nr_hugepages_store_common(obey_mempolicy, h,
|
|
NUMA_NO_NODE, tmp, *length);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int hugetlb_sysctl_handler(const struct ctl_table *table, int write,
|
|
void *buffer, size_t *length, loff_t *ppos)
|
|
{
|
|
|
|
return hugetlb_sysctl_handler_common(false, table, write,
|
|
buffer, length, ppos);
|
|
}
|
|
|
|
#ifdef CONFIG_NUMA
|
|
static int hugetlb_mempolicy_sysctl_handler(const struct ctl_table *table, int write,
|
|
void *buffer, size_t *length, loff_t *ppos)
|
|
{
|
|
return hugetlb_sysctl_handler_common(true, table, write,
|
|
buffer, length, ppos);
|
|
}
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
static int hugetlb_overcommit_handler(const struct ctl_table *table, int write,
|
|
void *buffer, size_t *length, loff_t *ppos)
|
|
{
|
|
struct hstate *h = &default_hstate;
|
|
unsigned long tmp;
|
|
int ret;
|
|
|
|
if (!hugepages_supported())
|
|
return -EOPNOTSUPP;
|
|
|
|
tmp = h->nr_overcommit_huge_pages;
|
|
|
|
if (write && hstate_is_gigantic_no_runtime(h))
|
|
return -EINVAL;
|
|
|
|
ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
|
|
&tmp);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (write) {
|
|
spin_lock_irq(&hugetlb_lock);
|
|
h->nr_overcommit_huge_pages = tmp;
|
|
spin_unlock_irq(&hugetlb_lock);
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static const struct ctl_table hugetlb_table[] = {
|
|
{
|
|
.procname = "nr_hugepages",
|
|
.data = NULL,
|
|
.maxlen = sizeof(unsigned long),
|
|
.mode = 0644,
|
|
.proc_handler = hugetlb_sysctl_handler,
|
|
},
|
|
#ifdef CONFIG_NUMA
|
|
{
|
|
.procname = "nr_hugepages_mempolicy",
|
|
.data = NULL,
|
|
.maxlen = sizeof(unsigned long),
|
|
.mode = 0644,
|
|
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
|
|
},
|
|
#endif
|
|
{
|
|
.procname = "hugetlb_shm_group",
|
|
.data = &sysctl_hugetlb_shm_group,
|
|
.maxlen = sizeof(gid_t),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{
|
|
.procname = "nr_overcommit_hugepages",
|
|
.data = NULL,
|
|
.maxlen = sizeof(unsigned long),
|
|
.mode = 0644,
|
|
.proc_handler = hugetlb_overcommit_handler,
|
|
},
|
|
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
|
|
{
|
|
.procname = "movable_gigantic_pages",
|
|
.data = &movable_gigantic_pages,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
#endif
|
|
};
|
|
|
|
void __init hugetlb_sysctl_init(void)
|
|
{
|
|
register_sysctl_init("vm", hugetlb_table);
|
|
}
|
|
#endif /* CONFIG_SYSCTL */
|