cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset

Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...

There are many drawbacks to that approach:

1) Most interested subsystems want to know about all isolated CPUs, not
  just those defined on boot time.

2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
  concurrent cpuset changes.

3) Further cpuset modifications are not propagated to subsystems

Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.

Subsystems can rely on RCU to synchronize against concurrent changes.

The propagation mentioned in 3) will be handled in further patches.

[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
This commit is contained in:
Frederic Weisbecker 2025-05-28 18:05:32 +02:00
parent 27c3a5967f
commit 03ff735101
4 changed files with 80 additions and 8 deletions

View file

@ -9,6 +9,11 @@
enum hk_type {
/* Inverse of boot-time isolcpus= argument */
HK_TYPE_DOMAIN_BOOT,
/*
* Same as HK_TYPE_DOMAIN_BOOT but also includes the
* inverse of cpuset isolated partitions. As such it
* is always a subset of HK_TYPE_DOMAIN_BOOT.
*/
HK_TYPE_DOMAIN,
/* Inverse of boot-time isolcpus=managed_irq argument */
HK_TYPE_MANAGED_IRQ,
@ -35,6 +40,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
extern bool housekeeping_enabled(enum hk_type type);
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
extern int housekeeping_update(struct cpumask *isol_mask);
extern void __init housekeeping_init(void);
#else
@ -62,6 +68,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
return true;
}
static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */

View file

@ -1482,14 +1482,15 @@ static void update_isolation_cpumasks(void)
if (!isolated_cpus_updating)
return;
lockdep_assert_cpus_held();
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
ret = housekeeping_update(isolated_cpus);
WARN_ON_ONCE(ret < 0);
isolated_cpus_updating = false;
}

View file

@ -29,18 +29,48 @@ static struct housekeeping housekeeping;
bool housekeeping_enabled(enum hk_type type)
{
return !!(housekeeping.flags & BIT(type));
return !!(READ_ONCE(housekeeping.flags) & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
static bool housekeeping_dereference_check(enum hk_type type)
{
if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
/* Cpuset isn't even writable yet? */
if (system_state <= SYSTEM_SCHEDULING)
return true;
/* CPU hotplug write locked, so cpuset partition can't be overwritten */
if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
return true;
/* Cpuset lock held, partitions not writable */
if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
return true;
return false;
}
return true;
}
static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
{
return rcu_dereference_all_check(housekeeping.cpumasks[type],
housekeeping_dereference_check(type));
}
const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
const struct cpumask *mask = NULL;
if (static_branch_unlikely(&housekeeping_overridden)) {
if (housekeeping.flags & BIT(type)) {
return rcu_dereference_check(housekeeping.cpumasks[type], 1);
}
if (READ_ONCE(housekeeping.flags) & BIT(type))
mask = housekeeping_cpumask_dereference(type);
}
return cpu_possible_mask;
if (!mask)
mask = cpu_possible_mask;
return mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
@ -80,12 +110,45 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type))
if (static_branch_unlikely(&housekeeping_overridden) &&
READ_ONCE(housekeeping.flags) & BIT(type))
return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
int housekeeping_update(struct cpumask *isol_mask)
{
struct cpumask *trial, *old = NULL;
lockdep_assert_cpus_held();
trial = kmalloc(cpumask_size(), GFP_KERNEL);
if (!trial)
return -ENOMEM;
cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
if (!cpumask_intersects(trial, cpu_online_mask)) {
kfree(trial);
return -EINVAL;
}
if (!housekeeping.flags)
static_branch_enable_cpuslocked(&housekeeping_overridden);
if (housekeeping.flags & HK_FLAG_DOMAIN)
old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
else
WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
synchronize_rcu();
kfree(old);
return 0;
}
void __init housekeeping_init(void)
{
enum hk_type type;

View file

@ -30,6 +30,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/fs_api.h>