mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
The kthread code provides an infrastructure which manages the preferred
affinity of unbound kthreads (node or custom cpumask) against housekeeping (CPU isolation) constraints and CPU hotplug events. One crucial missing piece is the handling of cpuset: when an isolated partition is created, deleted, or its CPUs updated, all the unbound kthreads in the top cpuset become indifferently affine to _all_ the non-isolated CPUs, possibly breaking their preferred affinity along the way. Solve this with performing the kthreads affinity update from cpuset to the kthreads consolidated relevant code instead so that preferred affinities are honoured and applied against the updated cpuset isolated partitions. The dispatch of the new isolated cpumasks to timers, workqueues and kthreads is performed by housekeeping, as per the nice Tejun's suggestion. As a welcome side effect, HK_TYPE_DOMAIN then integrates both the set from boot defined domain isolation (through isolcpus=) and cpuset isolated partitions. Housekeeping cpumasks are now modifiable with a specific RCU based synchronization. A big step toward making nohz_full= also mutable through cpuset in the future. -----BEGIN PGP SIGNATURE----- iQJPBAABCAA5FiEEd76+gtGM8MbftQlOhSRUR1COjHcFAmmE0mYbFIAAAAAABAAO bWFudTIsMi41KzEuMTEsMiwyAAoJEIUkVEdQjox36eMP/0Ls/ArfYVi/MNAXWlpy rAt6m9Y/X9GBcDM/VI9BXq1ZX4qEr2XjJ8UUb8cM08uHEAt0ErlmpRxREwJFrKbI H4jzg5EwO0D0c6MnvgQJEAwkHxQVIjsxG9DovRIjxyW4ycx3aSsRg/f2VKyWoLvY 7ZT7CbLFE+I/MQh2ZgUu/9pnCDQVR2anss2WYIej5mmgFL5pyEv3YvYgKYVyK08z sXyNxpP976g2d9ECJ9OtFJV9we6mlqxlG0MVCiv/Uxh7DBjxWWPsLvlmLAXggQ03 +0GW+nnutDaKz83pgS7Z4zum/+Oa+I1dTLIN27pARUNcMCYip7njM2KNpJwPdov3 +fAIODH2JVX1xewT+U1cCq6gdI55ejbwdQYGFV075dKBUxKQeIyrghvfC3Ga6aKQ Gw3y68jdrXOw6iyfHR5k/0Mnu2/FDKUW2fZxLKm55PvNZP5jQFmSlz9wyiwwyb3m UUSgThj6Ozodxks8hDX41rGVezCcm1ni+qNSiNIs8HPaaZQrwbnvKHQFBBJHQzJP rJ39VWBx3Hq/ly71BOR6pCzoZsfS1f85YKhJ4vsfjLO6BfhI16nBat89eROSRKcz XptyWqW0PgAD0teDuMCTPNuUym/viBHALXHKuSO12CIizacvftiGcmaQNPlLiiFZ /Dr2+aOhwYw3UD6djn3u94M9 =nWGh -----END PGP SIGNATURE----- Merge tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks Pull kthread updates from Frederic Weisbecker: "The kthread code provides an infrastructure which manages the preferred affinity of unbound kthreads (node or custom cpumask) against housekeeping (CPU isolation) constraints and CPU hotplug events. One crucial missing piece is the handling of cpuset: when an isolated partition is created, deleted, or its CPUs updated, all the unbound kthreads in the top cpuset become indifferently affine to _all_ the non-isolated CPUs, possibly breaking their preferred affinity along the way. Solve this with performing the kthreads affinity update from cpuset to the kthreads consolidated relevant code instead so that preferred affinities are honoured and applied against the updated cpuset isolated partitions. The dispatch of the new isolated cpumasks to timers, workqueues and kthreads is performed by housekeeping, as per the nice Tejun's suggestion. As a welcome side effect, HK_TYPE_DOMAIN then integrates both the set from boot defined domain isolation (through isolcpus=) and cpuset isolated partitions. Housekeeping cpumasks are now modifiable with a specific RCU based synchronization. A big step toward making nohz_full= also mutable through cpuset in the future" * tag 'kthread-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks: (33 commits) doc: Add housekeeping documentation kthread: Document kthread_affine_preferred() kthread: Comment on the purpose and placement of kthread_affine_node() call kthread: Honour kthreads preferred affinity after cpuset changes sched/arm64: Move fallback task cpumask to HK_TYPE_DOMAIN sched: Switch the fallback task allowed cpumask to HK_TYPE_DOMAIN kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management kthread: Include kthreadd to the managed affinity list kthread: Include unbound kthreads in the managed affinity list kthread: Refine naming of affinity related fields PCI: Remove superfluous HK_TYPE_WQ check sched/isolation: Remove HK_TYPE_TICK test from cpu_is_isolated() cpuset: Remove cpuset_cpu_is_isolated() timers/migration: Remove superfluous cpuset isolation test cpuset: Propagate cpuset isolation update to timers through housekeeping cpuset: Propagate cpuset isolation update to workqueue through housekeeping PCI: Flush PCI probe workqueue on cpuset isolated partition change sched/isolation: Flush vmstat workqueues on cpuset isolated partition change sched/isolation: Flush memcg workqueues on cpuset isolated partition change cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset ...
This commit is contained in:
commit
d16738a4e7
28 changed files with 554 additions and 222 deletions
|
|
@ -154,10 +154,14 @@ mode will return to host userspace with an ``exit_reason`` of
|
|||
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
|
||||
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
|
||||
|
||||
NOHZ FULL
|
||||
---------
|
||||
SCHEDULER DOMAIN ISOLATION
|
||||
--------------------------
|
||||
|
||||
To avoid perturbing an adaptive-ticks CPU (specified using
|
||||
``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
|
||||
To avoid perturbing a boot-defined domain isolated CPU (specified using
|
||||
``isolcpus=[domain]``) when a 32-bit task is forcefully migrated, these CPUs
|
||||
are treated as 64-bit-only when support for asymmetric 32-bit systems
|
||||
is enabled.
|
||||
|
||||
However as opposed to boot-defined domain isolation, runtime-defined domain
|
||||
isolation using cpuset isolated partition is not advised on asymmetric
|
||||
32-bit systems and will result in undefined behaviour.
|
||||
|
|
|
|||
111
Documentation/core-api/housekeeping.rst
Normal file
111
Documentation/core-api/housekeeping.rst
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
======================================
|
||||
Housekeeping
|
||||
======================================
|
||||
|
||||
|
||||
CPU Isolation moves away kernel work that may otherwise run on any CPU.
|
||||
The purpose of its related features is to reduce the OS jitter that some
|
||||
extreme workloads can't stand, such as in some DPDK usecases.
|
||||
|
||||
The kernel work moved away by CPU isolation is commonly described as
|
||||
"housekeeping" because it includes ground work that performs cleanups,
|
||||
statistics maintainance and actions relying on them, memory release,
|
||||
various deferrals etc...
|
||||
|
||||
Sometimes housekeeping is just some unbound work (unbound workqueues,
|
||||
unbound timers, ...) that gets easily assigned to non-isolated CPUs.
|
||||
But sometimes housekeeping is tied to a specific CPU and requires
|
||||
elaborated tricks to be offloaded to non-isolated CPUs (RCU_NOCB, remote
|
||||
scheduler tick, etc...).
|
||||
|
||||
Thus, a housekeeping CPU can be considered as the reverse of an isolated
|
||||
CPU. It is simply a CPU that can execute housekeeping work. There must
|
||||
always be at least one online housekeeping CPU at any time. The CPUs that
|
||||
are not isolated are automatically assigned as housekeeping.
|
||||
|
||||
Housekeeping is currently divided in four features described
|
||||
by the ``enum hk_type type``:
|
||||
|
||||
1. HK_TYPE_DOMAIN matches the work moved away by scheduler domain
|
||||
isolation performed through ``isolcpus=domain`` boot parameter or
|
||||
isolated cpuset partitions in cgroup v2. This includes scheduler
|
||||
load balancing, unbound workqueues and timers.
|
||||
|
||||
2. HK_TYPE_KERNEL_NOISE matches the work moved away by tick isolation
|
||||
performed through ``nohz_full=`` or ``isolcpus=nohz`` boot
|
||||
parameters. This includes remote scheduler tick, vmstat and lockup
|
||||
watchdog.
|
||||
|
||||
3. HK_TYPE_MANAGED_IRQ matches the IRQ handlers moved away by managed
|
||||
IRQ isolation performed through ``isolcpus=managed_irq``.
|
||||
|
||||
4. HK_TYPE_DOMAIN_BOOT matches the work moved away by scheduler domain
|
||||
isolation performed through ``isolcpus=domain`` only. It is similar
|
||||
to HK_TYPE_DOMAIN except it ignores the isolation performed by
|
||||
cpusets.
|
||||
|
||||
|
||||
Housekeeping cpumasks
|
||||
=================================
|
||||
|
||||
Housekeeping cpumasks include the CPUs that can execute the work moved
|
||||
away by the matching isolation feature. These cpumasks are returned by
|
||||
the following function::
|
||||
|
||||
const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||
|
||||
By default, if neither ``nohz_full=``, nor ``isolcpus``, nor cpuset's
|
||||
isolated partitions are used, which covers most usecases, this function
|
||||
returns the cpu_possible_mask.
|
||||
|
||||
Otherwise the function returns the cpumask complement of the isolation
|
||||
feature. For example:
|
||||
|
||||
With isolcpus=domain,7 the following will return a mask with all possible
|
||||
CPUs except 7::
|
||||
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN)
|
||||
|
||||
Similarly with nohz_full=5,6 the following will return a mask with all
|
||||
possible CPUs except 5,6::
|
||||
|
||||
housekeeping_cpumask(HK_TYPE_KERNEL_NOISE)
|
||||
|
||||
|
||||
Synchronization against cpusets
|
||||
=================================
|
||||
|
||||
Cpuset can modify the HK_TYPE_DOMAIN housekeeping cpumask while creating,
|
||||
modifying or deleting an isolated partition.
|
||||
|
||||
The users of HK_TYPE_DOMAIN cpumask must then make sure to synchronize
|
||||
properly against cpuset in order to make sure that:
|
||||
|
||||
1. The cpumask snapshot stays coherent.
|
||||
|
||||
2. No housekeeping work is queued on a newly made isolated CPU.
|
||||
|
||||
3. Pending housekeeping work that was queued to a non isolated
|
||||
CPU which just turned isolated through cpuset must be flushed
|
||||
before the related created/modified isolated partition is made
|
||||
available to userspace.
|
||||
|
||||
This synchronization is maintained by an RCU based scheme. The cpuset update
|
||||
side waits for an RCU grace period after updating the HK_TYPE_DOMAIN
|
||||
cpumask and before flushing pending works. On the read side, care must be
|
||||
taken to gather the housekeeping target election and the work enqueue within
|
||||
the same RCU read side critical section.
|
||||
|
||||
A typical layout example would look like this on the update side
|
||||
(``housekeeping_update()``)::
|
||||
|
||||
rcu_assign_pointer(housekeeping_cpumasks[type], trial);
|
||||
synchronize_rcu();
|
||||
flush_workqueue(example_workqueue);
|
||||
|
||||
And then on the read side::
|
||||
|
||||
rcu_read_lock();
|
||||
cpu = housekeeping_any_cpu(HK_TYPE_DOMAIN);
|
||||
queue_work_on(cpu, example_workqueue, work);
|
||||
rcu_read_unlock();
|
||||
|
|
@ -25,6 +25,7 @@ it.
|
|||
symbol-namespaces
|
||||
asm-annotations
|
||||
real-time/index
|
||||
housekeeping.rst
|
||||
|
||||
Data structures and low-level utilities
|
||||
=======================================
|
||||
|
|
|
|||
|
|
@ -1669,7 +1669,7 @@ const struct cpumask *system_32bit_el0_cpumask(void)
|
|||
|
||||
const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
|
||||
{
|
||||
return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
|
||||
return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
}
|
||||
|
||||
static int __init parse_32bit_el0_param(char *str)
|
||||
|
|
@ -3987,8 +3987,8 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
|
|||
bool cpu_32bit = false;
|
||||
|
||||
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
|
||||
pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
|
||||
if (!housekeeping_cpu(cpu, HK_TYPE_DOMAIN))
|
||||
pr_info("Treating domain isolated CPU %u as 64-bit only\n", cpu);
|
||||
else
|
||||
cpu_32bit = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4270,12 +4270,16 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
|||
|
||||
/*
|
||||
* Rule out isolated CPUs from hctx->cpumask to avoid
|
||||
* running block kworker on isolated CPUs
|
||||
* running block kworker on isolated CPUs.
|
||||
* FIXME: cpuset should propagate further changes to isolated CPUs
|
||||
* here.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for_each_cpu(cpu, hctx->cpumask) {
|
||||
if (cpu_is_isolated(cpu))
|
||||
cpumask_clear_cpu(cpu, hctx->cpumask);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* Initialize batch roundrobin counts
|
||||
|
|
|
|||
|
|
@ -291,7 +291,7 @@ static ssize_t print_cpus_isolated(struct device *dev,
|
|||
return -ENOMEM;
|
||||
|
||||
cpumask_andnot(isolated, cpu_possible_mask,
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT));
|
||||
len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
|
||||
|
||||
free_cpumask_var(isolated);
|
||||
|
|
|
|||
|
|
@ -302,9 +302,8 @@ struct drv_dev_and_id {
|
|||
const struct pci_device_id *id;
|
||||
};
|
||||
|
||||
static long local_pci_probe(void *_ddi)
|
||||
static int local_pci_probe(struct drv_dev_and_id *ddi)
|
||||
{
|
||||
struct drv_dev_and_id *ddi = _ddi;
|
||||
struct pci_dev *pci_dev = ddi->dev;
|
||||
struct pci_driver *pci_drv = ddi->drv;
|
||||
struct device *dev = &pci_dev->dev;
|
||||
|
|
@ -338,6 +337,21 @@ static long local_pci_probe(void *_ddi)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct workqueue_struct *pci_probe_wq;
|
||||
|
||||
struct pci_probe_arg {
|
||||
struct drv_dev_and_id *ddi;
|
||||
struct work_struct work;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void local_pci_probe_callback(struct work_struct *work)
|
||||
{
|
||||
struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work);
|
||||
|
||||
arg->ret = local_pci_probe(arg->ddi);
|
||||
}
|
||||
|
||||
static bool pci_physfn_is_probed(struct pci_dev *dev)
|
||||
{
|
||||
#ifdef CONFIG_PCI_IOV
|
||||
|
|
@ -362,40 +376,55 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
|
|||
dev->is_probed = 1;
|
||||
|
||||
cpu_hotplug_disable();
|
||||
|
||||
/*
|
||||
* Prevent nesting work_on_cpu() for the case where a Virtual Function
|
||||
* device is probed from work_on_cpu() of the Physical device.
|
||||
*/
|
||||
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
|
||||
pci_physfn_is_probed(dev)) {
|
||||
cpu = nr_cpu_ids;
|
||||
error = local_pci_probe(&ddi);
|
||||
} else {
|
||||
cpumask_var_t wq_domain_mask;
|
||||
|
||||
if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) {
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpumask_and(wq_domain_mask,
|
||||
housekeeping_cpumask(HK_TYPE_WQ),
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
struct pci_probe_arg arg = { .ddi = &ddi };
|
||||
|
||||
INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback);
|
||||
/*
|
||||
* The target election and the enqueue of the work must be within
|
||||
* the same RCU read side section so that when the workqueue pool
|
||||
* is flushed after a housekeeping cpumask update, further readers
|
||||
* are guaranteed to queue the probing work to the appropriate
|
||||
* targets.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
cpu = cpumask_any_and(cpumask_of_node(node),
|
||||
wq_domain_mask);
|
||||
free_cpumask_var(wq_domain_mask);
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
|
||||
if (cpu < nr_cpu_ids) {
|
||||
struct workqueue_struct *wq = pci_probe_wq;
|
||||
|
||||
if (WARN_ON_ONCE(!wq))
|
||||
wq = system_percpu_wq;
|
||||
queue_work_on(cpu, wq, &arg.work);
|
||||
rcu_read_unlock();
|
||||
flush_work(&arg.work);
|
||||
error = arg.ret;
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
error = local_pci_probe(&ddi);
|
||||
}
|
||||
|
||||
destroy_work_on_stack(&arg.work);
|
||||
}
|
||||
|
||||
if (cpu < nr_cpu_ids)
|
||||
error = work_on_cpu(cpu, local_pci_probe, &ddi);
|
||||
else
|
||||
error = local_pci_probe(&ddi);
|
||||
out:
|
||||
dev->is_probed = 0;
|
||||
cpu_hotplug_enable();
|
||||
return error;
|
||||
}
|
||||
|
||||
void pci_probe_flush_workqueue(void)
|
||||
{
|
||||
flush_workqueue(pci_probe_wq);
|
||||
}
|
||||
|
||||
/**
|
||||
* __pci_device_probe - check if a driver wants to claim a specific PCI device
|
||||
* @drv: driver to call to check if it wants the PCI device
|
||||
|
|
@ -1733,6 +1762,10 @@ static int __init pci_driver_init(void)
|
|||
{
|
||||
int ret;
|
||||
|
||||
pci_probe_wq = alloc_workqueue("sync_wq", WQ_PERCPU, 0);
|
||||
if (!pci_probe_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = bus_register(&pci_bus_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
struct device;
|
||||
|
||||
extern int lockdep_is_cpus_held(void);
|
||||
extern int lockdep_is_cpus_write_held(void);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
void cpus_write_lock(void);
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@
|
|||
#include <linux/mmu_context.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
extern bool lockdep_is_cpuset_held(void);
|
||||
|
||||
#ifdef CONFIG_CPUSETS
|
||||
|
||||
/*
|
||||
|
|
@ -77,7 +79,6 @@ extern void cpuset_unlock(void);
|
|||
extern void cpuset_cpus_allowed_locked(struct task_struct *p, struct cpumask *mask);
|
||||
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
||||
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
|
||||
extern bool cpuset_cpu_is_isolated(int cpu);
|
||||
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
||||
#define cpuset_current_mems_allowed (current->mems_allowed)
|
||||
void cpuset_init_current_mems_allowed(void);
|
||||
|
|
@ -213,11 +214,6 @@ static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool cpuset_cpu_is_isolated(int cpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
|
||||
{
|
||||
return node_possible_map;
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ void kthread_unpark(struct task_struct *k);
|
|||
void kthread_parkme(void);
|
||||
void kthread_exit(long result) __noreturn;
|
||||
void kthread_complete_and_exit(struct completion *, long) __noreturn;
|
||||
int kthreads_update_housekeeping(void);
|
||||
|
||||
int kthreadd(void *unused);
|
||||
extern struct task_struct *kthreadd_task;
|
||||
|
|
|
|||
|
|
@ -1037,6 +1037,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
|
|||
return id;
|
||||
}
|
||||
|
||||
void mem_cgroup_flush_workqueue(void);
|
||||
|
||||
extern int mem_cgroup_init(void);
|
||||
#else /* CONFIG_MEMCG */
|
||||
|
||||
|
|
@ -1436,6 +1438,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_flush_workqueue(void) { }
|
||||
|
||||
static inline int mem_cgroup_init(void) { return 0; }
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ static inline void leave_mm(void) { }
|
|||
#ifndef task_cpu_possible_mask
|
||||
# define task_cpu_possible_mask(p) cpu_possible_mask
|
||||
# define task_cpu_possible(cpu, p) true
|
||||
# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK)
|
||||
# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_DOMAIN)
|
||||
#else
|
||||
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1206,6 +1206,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
|
|||
struct pci_ops *ops, void *sysdata,
|
||||
struct list_head *resources);
|
||||
int pci_host_probe(struct pci_host_bridge *bridge);
|
||||
void pci_probe_flush_workqueue(void);
|
||||
int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
|
||||
int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
|
||||
void pci_bus_release_busn_res(struct pci_bus *b);
|
||||
|
|
@ -2079,6 +2080,8 @@ static inline int pci_has_flag(int flag) { return 0; }
|
|||
_PCI_NOP_ALL(read, *)
|
||||
_PCI_NOP_ALL(write,)
|
||||
|
||||
static inline void pci_probe_flush_workqueue(void) { }
|
||||
|
||||
static inline struct pci_dev *pci_get_device(unsigned int vendor,
|
||||
unsigned int device,
|
||||
struct pci_dev *from)
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
|
|||
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
|
||||
})
|
||||
|
||||
#define percpu_rwsem_is_write_held(sem) lockdep_is_held_type(sem, 0)
|
||||
#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
|
||||
#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,21 @@
|
|||
#define _LINUX_SCHED_ISOLATION_H
|
||||
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
enum hk_type {
|
||||
/* Inverse of boot-time isolcpus= argument */
|
||||
HK_TYPE_DOMAIN_BOOT,
|
||||
/*
|
||||
* Same as HK_TYPE_DOMAIN_BOOT but also includes the
|
||||
* inverse of cpuset isolated partitions. As such it
|
||||
* is always a subset of HK_TYPE_DOMAIN_BOOT.
|
||||
*/
|
||||
HK_TYPE_DOMAIN,
|
||||
/* Inverse of boot-time isolcpus=managed_irq argument */
|
||||
HK_TYPE_MANAGED_IRQ,
|
||||
/* Inverse of boot-time nohz_full= or isolcpus=nohz arguments */
|
||||
HK_TYPE_KERNEL_NOISE,
|
||||
HK_TYPE_MAX,
|
||||
|
||||
|
|
@ -31,6 +39,7 @@ extern const struct cpumask *housekeeping_cpumask(enum hk_type type);
|
|||
extern bool housekeeping_enabled(enum hk_type type);
|
||||
extern void housekeeping_affine(struct task_struct *t, enum hk_type type);
|
||||
extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
|
||||
extern int housekeeping_update(struct cpumask *isol_mask);
|
||||
extern void __init housekeeping_init(void);
|
||||
|
||||
#else
|
||||
|
|
@ -58,6 +67,7 @@ static inline bool housekeeping_test_cpu(int cpu, enum hk_type type)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; }
|
||||
static inline void housekeeping_init(void) { }
|
||||
#endif /* CONFIG_CPU_ISOLATION */
|
||||
|
||||
|
|
@ -72,9 +82,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
|
|||
|
||||
static inline bool cpu_is_isolated(int cpu)
|
||||
{
|
||||
return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) ||
|
||||
!housekeeping_test_cpu(cpu, HK_TYPE_TICK) ||
|
||||
cpuset_cpu_is_isolated(cpu);
|
||||
return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SCHED_ISOLATION_H */
|
||||
|
|
|
|||
|
|
@ -303,6 +303,7 @@ int calculate_pressure_threshold(struct zone *zone);
|
|||
int calculate_normal_threshold(struct zone *zone);
|
||||
void set_pgdat_percpu_threshold(pg_data_t *pgdat,
|
||||
int (*calculate_pressure)(struct zone *));
|
||||
void vmstat_flush_workqueue(void);
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
|
|
@ -403,6 +404,7 @@ static inline void __dec_node_page_state(struct page *page,
|
|||
static inline void refresh_zone_stat_thresholds(void) { }
|
||||
static inline void cpu_vm_stats_fold(int cpu) { }
|
||||
static inline void quiet_vmstat(void) { }
|
||||
static inline void vmstat_flush_workqueue(void) { }
|
||||
|
||||
static inline void drain_zonestat(struct zone *zone,
|
||||
struct per_cpu_zonestat *pzstats) { }
|
||||
|
|
|
|||
|
|
@ -588,7 +588,7 @@ struct workqueue_attrs *alloc_workqueue_attrs_noprof(void);
|
|||
void free_workqueue_attrs(struct workqueue_attrs *attrs);
|
||||
int apply_workqueue_attrs(struct workqueue_struct *wq,
|
||||
const struct workqueue_attrs *attrs);
|
||||
extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask);
|
||||
extern int workqueue_unbound_housekeeping_update(const struct cpumask *hk);
|
||||
|
||||
extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
|
||||
struct work_struct *work);
|
||||
|
|
|
|||
|
|
@ -1257,6 +1257,7 @@ config CPUSETS
|
|||
bool "Cpuset controller"
|
||||
depends on SMP
|
||||
select UNION_FIND
|
||||
select CPU_ISOLATION
|
||||
help
|
||||
This option will let you create and manage CPUSETs which
|
||||
allow dynamically partitioning a system into sets of CPUs and
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
#include <linux/mempolicy.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
|
|
@ -85,12 +84,6 @@ static cpumask_var_t isolated_cpus;
|
|||
*/
|
||||
static bool isolated_cpus_updating;
|
||||
|
||||
/*
|
||||
* Housekeeping (HK_TYPE_DOMAIN) CPUs at boot
|
||||
*/
|
||||
static cpumask_var_t boot_hk_cpus;
|
||||
static bool have_boot_isolcpus;
|
||||
|
||||
/*
|
||||
* A flag to force sched domain rebuild at the end of an operation.
|
||||
* It can be set in
|
||||
|
|
@ -286,6 +279,13 @@ void cpuset_full_unlock(void)
|
|||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
bool lockdep_is_cpuset_held(void)
|
||||
{
|
||||
return lockdep_is_held(&cpuset_mutex);
|
||||
}
|
||||
#endif
|
||||
|
||||
static DEFINE_SPINLOCK(callback_lock);
|
||||
|
||||
void cpuset_callback_lock_irq(void)
|
||||
|
|
@ -1205,11 +1205,10 @@ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
|
|||
|
||||
if (top_cs) {
|
||||
/*
|
||||
* PF_KTHREAD tasks are handled by housekeeping.
|
||||
* PF_NO_SETAFFINITY tasks are ignored.
|
||||
* All per cpu kthreads should have PF_NO_SETAFFINITY
|
||||
* flag set, see kthread_set_per_cpu().
|
||||
*/
|
||||
if (task->flags & PF_NO_SETAFFINITY)
|
||||
if (task->flags & (PF_KTHREAD | PF_NO_SETAFFINITY))
|
||||
continue;
|
||||
cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus);
|
||||
} else {
|
||||
|
|
@ -1450,15 +1449,16 @@ static bool isolated_cpus_can_update(struct cpumask *add_cpus,
|
|||
* @new_cpus: cpu mask
|
||||
* Return: true if there is conflict, false otherwise
|
||||
*
|
||||
* CPUs outside of boot_hk_cpus, if defined, can only be used in an
|
||||
* CPUs outside of HK_TYPE_DOMAIN_BOOT, if defined, can only be used in an
|
||||
* isolated partition.
|
||||
*/
|
||||
static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
|
||||
{
|
||||
if (!have_boot_isolcpus)
|
||||
if (!housekeeping_enabled(HK_TYPE_DOMAIN_BOOT))
|
||||
return false;
|
||||
|
||||
if ((prstate != PRS_ISOLATED) && !cpumask_subset(new_cpus, boot_hk_cpus))
|
||||
if ((prstate != PRS_ISOLATED) &&
|
||||
!cpumask_subset(new_cpus, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
@ -1477,28 +1477,12 @@ static void update_isolation_cpumasks(void)
|
|||
if (!isolated_cpus_updating)
|
||||
return;
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
|
||||
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
|
||||
WARN_ON_ONCE(ret < 0);
|
||||
|
||||
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
|
||||
ret = housekeeping_update(isolated_cpus);
|
||||
WARN_ON_ONCE(ret < 0);
|
||||
|
||||
isolated_cpus_updating = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_cpu_is_isolated - Check if the given CPU is isolated
|
||||
* @cpu: the CPU number to be checked
|
||||
* Return: true if CPU is used in an isolated partition, false otherwise
|
||||
*/
|
||||
bool cpuset_cpu_is_isolated(int cpu)
|
||||
{
|
||||
return cpumask_test_cpu(cpu, isolated_cpus);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated);
|
||||
|
||||
/**
|
||||
* rm_siblings_excl_cpus - Remove exclusive CPUs that are used by sibling cpusets
|
||||
* @parent: Parent cpuset containing all siblings
|
||||
|
|
@ -3896,12 +3880,9 @@ int __init cpuset_init(void)
|
|||
|
||||
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
|
||||
|
||||
have_boot_isolcpus = housekeeping_enabled(HK_TYPE_DOMAIN);
|
||||
if (have_boot_isolcpus) {
|
||||
BUG_ON(!alloc_cpumask_var(&boot_hk_cpus, GFP_KERNEL));
|
||||
cpumask_copy(boot_hk_cpus, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
cpumask_andnot(isolated_cpus, cpu_possible_mask, boot_hk_cpus);
|
||||
}
|
||||
if (housekeeping_enabled(HK_TYPE_DOMAIN_BOOT))
|
||||
cpumask_andnot(isolated_cpus, cpu_possible_mask,
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
42
kernel/cpu.c
42
kernel/cpu.c
|
|
@ -534,6 +534,11 @@ int lockdep_is_cpus_held(void)
|
|||
{
|
||||
return percpu_rwsem_is_held(&cpu_hotplug_lock);
|
||||
}
|
||||
|
||||
int lockdep_is_cpus_write_held(void)
|
||||
{
|
||||
return percpu_rwsem_is_write_held(&cpu_hotplug_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void lockdep_acquire_cpus_lock(void)
|
||||
|
|
@ -1410,6 +1415,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
|
|||
|
||||
cpus_write_lock();
|
||||
|
||||
/*
|
||||
* Keep at least one housekeeping cpu onlined to avoid generating
|
||||
* an empty sched_domain span.
|
||||
*/
|
||||
if (cpumask_any_and(cpu_online_mask,
|
||||
housekeeping_cpumask(HK_TYPE_DOMAIN)) >= nr_cpu_ids) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cpuhp_tasks_frozen = tasks_frozen;
|
||||
|
||||
prev_state = cpuhp_set_state(cpu, st, target);
|
||||
|
|
@ -1456,22 +1471,8 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
struct cpu_down_work {
|
||||
unsigned int cpu;
|
||||
enum cpuhp_state target;
|
||||
};
|
||||
|
||||
static long __cpu_down_maps_locked(void *arg)
|
||||
{
|
||||
struct cpu_down_work *work = arg;
|
||||
|
||||
return _cpu_down(work->cpu, 0, work->target);
|
||||
}
|
||||
|
||||
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
|
||||
{
|
||||
struct cpu_down_work work = { .cpu = cpu, .target = target, };
|
||||
|
||||
/*
|
||||
* If the platform does not support hotplug, report it explicitly to
|
||||
* differentiate it from a transient offlining failure.
|
||||
|
|
@ -1480,18 +1481,7 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
|
|||
return -EOPNOTSUPP;
|
||||
if (cpu_hotplug_disabled)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Ensure that the control task does not run on the to be offlined
|
||||
* CPU to prevent a deadlock against cfs_b->period_timer.
|
||||
* Also keep at least one housekeeping cpu onlined to avoid generating
|
||||
* an empty sched_domain span.
|
||||
*/
|
||||
for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
|
||||
if (cpu != work.cpu)
|
||||
return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
|
||||
}
|
||||
return -EBUSY;
|
||||
return _cpu_down(cpu, 0, target);
|
||||
}
|
||||
|
||||
static int cpu_down(unsigned int cpu, enum cpuhp_state target)
|
||||
|
|
|
|||
190
kernel/kthread.c
190
kernel/kthread.c
|
|
@ -35,8 +35,8 @@ static DEFINE_SPINLOCK(kthread_create_lock);
|
|||
static LIST_HEAD(kthread_create_list);
|
||||
struct task_struct *kthreadd_task;
|
||||
|
||||
static LIST_HEAD(kthreads_hotplug);
|
||||
static DEFINE_MUTEX(kthreads_hotplug_lock);
|
||||
static LIST_HEAD(kthread_affinity_list);
|
||||
static DEFINE_MUTEX(kthread_affinity_lock);
|
||||
|
||||
struct kthread_create_info
|
||||
{
|
||||
|
|
@ -69,7 +69,7 @@ struct kthread {
|
|||
/* To store the full name if task comm is truncated. */
|
||||
char *full_name;
|
||||
struct task_struct *task;
|
||||
struct list_head hotplug_node;
|
||||
struct list_head affinity_node;
|
||||
struct cpumask *preferred_affinity;
|
||||
};
|
||||
|
||||
|
|
@ -128,7 +128,7 @@ bool set_kthread_struct(struct task_struct *p)
|
|||
|
||||
init_completion(&kthread->exited);
|
||||
init_completion(&kthread->parked);
|
||||
INIT_LIST_HEAD(&kthread->hotplug_node);
|
||||
INIT_LIST_HEAD(&kthread->affinity_node);
|
||||
p->vfork_done = &kthread->exited;
|
||||
|
||||
kthread->task = p;
|
||||
|
|
@ -323,10 +323,10 @@ void __noreturn kthread_exit(long result)
|
|||
{
|
||||
struct kthread *kthread = to_kthread(current);
|
||||
kthread->result = result;
|
||||
if (!list_empty(&kthread->hotplug_node)) {
|
||||
mutex_lock(&kthreads_hotplug_lock);
|
||||
list_del(&kthread->hotplug_node);
|
||||
mutex_unlock(&kthreads_hotplug_lock);
|
||||
if (!list_empty(&kthread->affinity_node)) {
|
||||
mutex_lock(&kthread_affinity_lock);
|
||||
list_del(&kthread->affinity_node);
|
||||
mutex_unlock(&kthread_affinity_lock);
|
||||
|
||||
if (kthread->preferred_affinity) {
|
||||
kfree(kthread->preferred_affinity);
|
||||
|
|
@ -362,17 +362,20 @@ static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpum
|
|||
{
|
||||
const struct cpumask *pref;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
if (kthread->preferred_affinity) {
|
||||
pref = kthread->preferred_affinity;
|
||||
} else {
|
||||
if (WARN_ON_ONCE(kthread->node == NUMA_NO_NODE))
|
||||
return;
|
||||
pref = cpumask_of_node(kthread->node);
|
||||
if (kthread->node == NUMA_NO_NODE)
|
||||
pref = housekeeping_cpumask(HK_TYPE_DOMAIN);
|
||||
else
|
||||
pref = cpumask_of_node(kthread->node);
|
||||
}
|
||||
|
||||
cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
if (cpumask_empty(cpumask))
|
||||
cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
}
|
||||
|
||||
static void kthread_affine_node(void)
|
||||
|
|
@ -380,32 +383,29 @@ static void kthread_affine_node(void)
|
|||
struct kthread *kthread = to_kthread(current);
|
||||
cpumask_var_t affinity;
|
||||
|
||||
WARN_ON_ONCE(kthread_is_per_cpu(current));
|
||||
if (WARN_ON_ONCE(kthread_is_per_cpu(current)))
|
||||
return;
|
||||
|
||||
if (kthread->node == NUMA_NO_NODE) {
|
||||
housekeeping_affine(current, HK_TYPE_KTHREAD);
|
||||
} else {
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&kthreads_hotplug_lock);
|
||||
WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
|
||||
list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
|
||||
/*
|
||||
* The node cpumask is racy when read from kthread() but:
|
||||
* - a racing CPU going down will either fail on the subsequent
|
||||
* call to set_cpus_allowed_ptr() or be migrated to housekeepers
|
||||
* afterwards by the scheduler.
|
||||
* - a racing CPU going up will be handled by kthreads_online_cpu()
|
||||
*/
|
||||
kthread_fetch_affinity(kthread, affinity);
|
||||
set_cpus_allowed_ptr(current, affinity);
|
||||
mutex_unlock(&kthreads_hotplug_lock);
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&kthread_affinity_lock);
|
||||
WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
|
||||
list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
|
||||
/*
|
||||
* The node cpumask is racy when read from kthread() but:
|
||||
* - a racing CPU going down will either fail on the subsequent
|
||||
* call to set_cpus_allowed_ptr() or be migrated to housekeepers
|
||||
* afterwards by the scheduler.
|
||||
* - a racing CPU going up will be handled by kthreads_online_cpu()
|
||||
*/
|
||||
kthread_fetch_affinity(kthread, affinity);
|
||||
set_cpus_allowed_ptr(current, affinity);
|
||||
mutex_unlock(&kthread_affinity_lock);
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
}
|
||||
|
||||
static int kthread(void *_create)
|
||||
|
|
@ -453,6 +453,10 @@ static int kthread(void *_create)
|
|||
|
||||
self->started = 1;
|
||||
|
||||
/*
|
||||
* Apply default node affinity if no call to kthread_bind[_mask]() nor
|
||||
* kthread_affine_preferred() was issued before the first wake-up.
|
||||
*/
|
||||
if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
|
||||
kthread_affine_node();
|
||||
|
||||
|
|
@ -820,12 +824,13 @@ int kthreadd(void *unused)
|
|||
/* Setup a clean context for our children to inherit. */
|
||||
set_task_comm(tsk, comm);
|
||||
ignore_signals(tsk);
|
||||
set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
|
||||
set_mems_allowed(node_states[N_MEMORY]);
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
cgroup_init_kthreadd();
|
||||
|
||||
kthread_affine_node();
|
||||
|
||||
for (;;) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (list_empty(&kthread_create_list))
|
||||
|
|
@ -851,6 +856,18 @@ int kthreadd(void *unused)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kthread_affine_preferred - Define a kthread's preferred affinity
|
||||
* @p: thread created by kthread_create().
|
||||
* @mask: preferred mask of CPUs (might not be online, must be possible) for @p
|
||||
* to run on.
|
||||
*
|
||||
* Similar to kthread_bind_mask() except that the affinity is not a requirement
|
||||
* but rather a preference that can be constrained by CPU isolation or CPU hotplug.
|
||||
* Must be called before the first wakeup of the kthread.
|
||||
*
|
||||
* Returns 0 if the affinity has been applied.
|
||||
*/
|
||||
int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
|
||||
{
|
||||
struct kthread *kthread = to_kthread(p);
|
||||
|
|
@ -873,16 +890,16 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
|
|||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&kthreads_hotplug_lock);
|
||||
mutex_lock(&kthread_affinity_lock);
|
||||
cpumask_copy(kthread->preferred_affinity, mask);
|
||||
WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
|
||||
list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
|
||||
WARN_ON_ONCE(!list_empty(&kthread->affinity_node));
|
||||
list_add_tail(&kthread->affinity_node, &kthread_affinity_list);
|
||||
kthread_fetch_affinity(kthread, affinity);
|
||||
|
||||
scoped_guard (raw_spinlock_irqsave, &p->pi_lock)
|
||||
set_cpus_allowed_force(p, affinity);
|
||||
|
||||
mutex_unlock(&kthreads_hotplug_lock);
|
||||
mutex_unlock(&kthread_affinity_lock);
|
||||
out:
|
||||
free_cpumask_var(affinity);
|
||||
|
||||
|
|
@ -890,6 +907,67 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_affine_preferred);
|
||||
|
||||
static int kthreads_update_affinity(bool force)
|
||||
{
|
||||
cpumask_var_t affinity;
|
||||
struct kthread *k;
|
||||
int ret;
|
||||
|
||||
guard(mutex)(&kthread_affinity_lock);
|
||||
|
||||
if (list_empty(&kthread_affinity_list))
|
||||
return 0;
|
||||
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = 0;
|
||||
|
||||
list_for_each_entry(k, &kthread_affinity_list, affinity_node) {
|
||||
if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
|
||||
kthread_is_per_cpu(k->task))) {
|
||||
ret = -EINVAL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unbound kthreads without preferred affinity are already affine
|
||||
* to housekeeping, whether those CPUs are online or not. So no need
|
||||
* to handle newly online CPUs for them. However housekeeping changes
|
||||
* have to be applied.
|
||||
*
|
||||
* But kthreads with a preferred affinity or node are different:
|
||||
* if none of their preferred CPUs are online and part of
|
||||
* housekeeping at the same time, they must be affine to housekeeping.
|
||||
* But as soon as one of their preferred CPU becomes online, they must
|
||||
* be affine to them.
|
||||
*/
|
||||
if (force || k->preferred_affinity || k->node != NUMA_NO_NODE) {
|
||||
kthread_fetch_affinity(k, affinity);
|
||||
set_cpus_allowed_ptr(k->task, affinity);
|
||||
}
|
||||
}
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kthreads_update_housekeeping - Update kthreads affinity on cpuset change
|
||||
*
|
||||
* When cpuset changes a partition type to/from "isolated" or updates related
|
||||
* cpumasks, propagate the housekeeping cpumask change to preferred kthreads
|
||||
* affinity.
|
||||
*
|
||||
* Returns 0 if successful, -ENOMEM if temporary mask couldn't
|
||||
* be allocated or -EINVAL in case of internal error.
|
||||
*/
|
||||
int kthreads_update_housekeeping(void)
|
||||
{
|
||||
return kthreads_update_affinity(true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-affine kthreads according to their preferences
|
||||
* and the newly online CPU. The CPU down part is handled
|
||||
|
|
@ -899,33 +977,7 @@ EXPORT_SYMBOL_GPL(kthread_affine_preferred);
|
|||
*/
|
||||
static int kthreads_online_cpu(unsigned int cpu)
|
||||
{
|
||||
cpumask_var_t affinity;
|
||||
struct kthread *k;
|
||||
int ret;
|
||||
|
||||
guard(mutex)(&kthreads_hotplug_lock);
|
||||
|
||||
if (list_empty(&kthreads_hotplug))
|
||||
return 0;
|
||||
|
||||
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
ret = 0;
|
||||
|
||||
list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
|
||||
if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
|
||||
kthread_is_per_cpu(k->task))) {
|
||||
ret = -EINVAL;
|
||||
continue;
|
||||
}
|
||||
kthread_fetch_affinity(k, affinity);
|
||||
set_cpus_allowed_ptr(k->task, affinity);
|
||||
}
|
||||
|
||||
free_cpumask_var(affinity);
|
||||
|
||||
return ret;
|
||||
return kthreads_update_affinity(false);
|
||||
}
|
||||
|
||||
static int kthreads_init(void)
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@
|
|||
*
|
||||
*/
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/pci.h>
|
||||
#include "sched.h"
|
||||
|
||||
enum hk_flags {
|
||||
HK_FLAG_DOMAIN_BOOT = BIT(HK_TYPE_DOMAIN_BOOT),
|
||||
HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN),
|
||||
HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ),
|
||||
HK_FLAG_KERNEL_NOISE = BIT(HK_TYPE_KERNEL_NOISE),
|
||||
|
|
@ -20,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(housekeeping_overridden);
|
|||
EXPORT_SYMBOL_GPL(housekeeping_overridden);
|
||||
|
||||
struct housekeeping {
|
||||
cpumask_var_t cpumasks[HK_TYPE_MAX];
|
||||
struct cpumask __rcu *cpumasks[HK_TYPE_MAX];
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
|
|
@ -28,21 +30,62 @@ static struct housekeeping housekeeping;
|
|||
|
||||
bool housekeeping_enabled(enum hk_type type)
|
||||
{
|
||||
return !!(housekeeping.flags & BIT(type));
|
||||
return !!(READ_ONCE(housekeeping.flags) & BIT(type));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_enabled);
|
||||
|
||||
static bool housekeeping_dereference_check(enum hk_type type)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) {
|
||||
/* Cpuset isn't even writable yet? */
|
||||
if (system_state <= SYSTEM_SCHEDULING)
|
||||
return true;
|
||||
|
||||
/* CPU hotplug write locked, so cpuset partition can't be overwritten */
|
||||
if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
|
||||
return true;
|
||||
|
||||
/* Cpuset lock held, partitions not writable */
|
||||
if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type)
|
||||
{
|
||||
return rcu_dereference_all_check(housekeeping.cpumasks[type],
|
||||
housekeeping_dereference_check(type));
|
||||
}
|
||||
|
||||
const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||
{
|
||||
const struct cpumask *mask = NULL;
|
||||
|
||||
if (static_branch_unlikely(&housekeeping_overridden)) {
|
||||
if (READ_ONCE(housekeeping.flags) & BIT(type))
|
||||
mask = housekeeping_cpumask_dereference(type);
|
||||
}
|
||||
if (!mask)
|
||||
mask = cpu_possible_mask;
|
||||
return mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
|
||||
|
||||
int housekeeping_any_cpu(enum hk_type type)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (static_branch_unlikely(&housekeeping_overridden)) {
|
||||
if (housekeeping.flags & BIT(type)) {
|
||||
cpu = sched_numa_find_closest(housekeeping.cpumasks[type], smp_processor_id());
|
||||
cpu = sched_numa_find_closest(housekeeping_cpumask(type), smp_processor_id());
|
||||
if (cpu < nr_cpu_ids)
|
||||
return cpu;
|
||||
|
||||
cpu = cpumask_any_and_distribute(housekeeping.cpumasks[type], cpu_online_mask);
|
||||
cpu = cpumask_any_and_distribute(housekeeping_cpumask(type), cpu_online_mask);
|
||||
if (likely(cpu < nr_cpu_ids))
|
||||
return cpu;
|
||||
/*
|
||||
|
|
@ -58,32 +101,69 @@ int housekeeping_any_cpu(enum hk_type type)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
|
||||
|
||||
const struct cpumask *housekeeping_cpumask(enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping.flags & BIT(type))
|
||||
return housekeeping.cpumasks[type];
|
||||
return cpu_possible_mask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
|
||||
|
||||
void housekeeping_affine(struct task_struct *t, enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping.flags & BIT(type))
|
||||
set_cpus_allowed_ptr(t, housekeeping.cpumasks[type]);
|
||||
set_cpus_allowed_ptr(t, housekeeping_cpumask(type));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_affine);
|
||||
|
||||
bool housekeeping_test_cpu(int cpu, enum hk_type type)
|
||||
{
|
||||
if (static_branch_unlikely(&housekeeping_overridden))
|
||||
if (housekeeping.flags & BIT(type))
|
||||
return cpumask_test_cpu(cpu, housekeeping.cpumasks[type]);
|
||||
if (static_branch_unlikely(&housekeeping_overridden) &&
|
||||
READ_ONCE(housekeeping.flags) & BIT(type))
|
||||
return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
|
||||
|
||||
int housekeeping_update(struct cpumask *isol_mask)
|
||||
{
|
||||
struct cpumask *trial, *old = NULL;
|
||||
int err;
|
||||
|
||||
lockdep_assert_cpus_held();
|
||||
|
||||
trial = kmalloc(cpumask_size(), GFP_KERNEL);
|
||||
if (!trial)
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask);
|
||||
if (!cpumask_intersects(trial, cpu_online_mask)) {
|
||||
kfree(trial);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!housekeeping.flags)
|
||||
static_branch_enable_cpuslocked(&housekeeping_overridden);
|
||||
|
||||
if (housekeeping.flags & HK_FLAG_DOMAIN)
|
||||
old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN);
|
||||
else
|
||||
WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN);
|
||||
rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial);
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
pci_probe_flush_workqueue();
|
||||
mem_cgroup_flush_workqueue();
|
||||
vmstat_flush_workqueue();
|
||||
|
||||
err = workqueue_unbound_housekeeping_update(housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
WARN_ON_ONCE(err < 0);
|
||||
|
||||
err = tmigr_isolated_exclude_cpumask(isol_mask);
|
||||
WARN_ON_ONCE(err < 0);
|
||||
|
||||
err = kthreads_update_housekeeping();
|
||||
WARN_ON_ONCE(err < 0);
|
||||
|
||||
kfree(old);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init housekeeping_init(void)
|
||||
{
|
||||
enum hk_type type;
|
||||
|
|
@ -95,20 +175,33 @@ void __init housekeeping_init(void)
|
|||
|
||||
if (housekeeping.flags & HK_FLAG_KERNEL_NOISE)
|
||||
sched_tick_offload_init();
|
||||
|
||||
/*
|
||||
* Realloc with a proper allocator so that any cpumask update
|
||||
* can indifferently free the old version with kfree().
|
||||
*/
|
||||
for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) {
|
||||
struct cpumask *omask, *nmask = kmalloc(cpumask_size(), GFP_KERNEL);
|
||||
|
||||
if (WARN_ON_ONCE(!nmask))
|
||||
return;
|
||||
|
||||
omask = rcu_dereference(housekeeping.cpumasks[type]);
|
||||
|
||||
/* We need at least one CPU to handle housekeeping work */
|
||||
WARN_ON_ONCE(cpumask_empty(housekeeping.cpumasks[type]));
|
||||
WARN_ON_ONCE(cpumask_empty(omask));
|
||||
cpumask_copy(nmask, omask);
|
||||
RCU_INIT_POINTER(housekeeping.cpumasks[type], nmask);
|
||||
memblock_free(omask, cpumask_size());
|
||||
}
|
||||
}
|
||||
|
||||
static void __init housekeeping_setup_type(enum hk_type type,
|
||||
cpumask_var_t housekeeping_staging)
|
||||
{
|
||||
struct cpumask *mask = memblock_alloc_or_panic(cpumask_size(), SMP_CACHE_BYTES);
|
||||
|
||||
alloc_bootmem_cpumask_var(&housekeeping.cpumasks[type]);
|
||||
cpumask_copy(housekeeping.cpumasks[type],
|
||||
housekeeping_staging);
|
||||
cpumask_copy(mask, housekeeping_staging);
|
||||
RCU_INIT_POINTER(housekeeping.cpumasks[type], mask);
|
||||
}
|
||||
|
||||
static int __init housekeeping_setup(char *str, unsigned long flags)
|
||||
|
|
@ -161,7 +254,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
|
|||
|
||||
for_each_set_bit(type, &iter_flags, HK_TYPE_MAX) {
|
||||
if (!cpumask_equal(housekeeping_staging,
|
||||
housekeeping.cpumasks[type])) {
|
||||
housekeeping_cpumask(type))) {
|
||||
pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
|
||||
goto free_housekeeping_staging;
|
||||
}
|
||||
|
|
@ -182,7 +275,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
|
|||
iter_flags = flags & (HK_FLAG_KERNEL_NOISE | HK_FLAG_DOMAIN);
|
||||
first_cpu = (type == HK_TYPE_MAX || !iter_flags) ? 0 :
|
||||
cpumask_first_and_and(cpu_present_mask,
|
||||
housekeeping_staging, housekeeping.cpumasks[type]);
|
||||
housekeeping_staging, housekeeping_cpumask(type));
|
||||
if (first_cpu >= min(nr_cpu_ids, setup_max_cpus)) {
|
||||
pr_warn("Housekeeping: must include one present CPU "
|
||||
"neither in nohz_full= nor in isolcpus=domain, "
|
||||
|
|
@ -239,7 +332,7 @@ static int __init housekeeping_isolcpus_setup(char *str)
|
|||
|
||||
if (!strncmp(str, "domain,", 7)) {
|
||||
str += 7;
|
||||
flags |= HK_FLAG_DOMAIN;
|
||||
flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -269,7 +362,7 @@ static int __init housekeeping_isolcpus_setup(char *str)
|
|||
|
||||
/* Default behaviour for isolcpus without flags */
|
||||
if (!flags)
|
||||
flags |= HK_FLAG_DOMAIN;
|
||||
flags |= HK_FLAG_DOMAIN | HK_FLAG_DOMAIN_BOOT;
|
||||
|
||||
return housekeeping_setup(str, flags);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include <linux/context_tracking.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/cpumask_api.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs_api.h>
|
||||
|
|
@ -42,6 +43,8 @@
|
|||
#include <linux/ktime_api.h>
|
||||
#include <linux/lockdep_api.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
|
|
@ -65,6 +68,7 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/u64_stats_sync_api.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/wait_api.h>
|
||||
#include <linux/wait_bit.h>
|
||||
#include <linux/workqueue_api.h>
|
||||
|
|
|
|||
|
|
@ -466,9 +466,8 @@ static inline bool tmigr_is_isolated(int cpu)
|
|||
{
|
||||
if (!static_branch_unlikely(&tmigr_exclude_isolated))
|
||||
return false;
|
||||
return (!housekeeping_cpu(cpu, HK_TYPE_DOMAIN) ||
|
||||
cpuset_cpu_is_isolated(cpu)) &&
|
||||
housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE);
|
||||
return (!housekeeping_cpu(cpu, HK_TYPE_DOMAIN) &&
|
||||
housekeeping_cpu(cpu, HK_TYPE_KERNEL_NOISE));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1497,7 +1496,7 @@ static int tmigr_clear_cpu_available(unsigned int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tmigr_set_cpu_available(unsigned int cpu)
|
||||
static int __tmigr_set_cpu_available(unsigned int cpu)
|
||||
{
|
||||
struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
|
||||
|
||||
|
|
@ -1505,9 +1504,6 @@ static int tmigr_set_cpu_available(unsigned int cpu)
|
|||
if (WARN_ON_ONCE(!tmc->tmgroup))
|
||||
return -EINVAL;
|
||||
|
||||
if (tmigr_is_isolated(cpu))
|
||||
return 0;
|
||||
|
||||
guard(mutex)(&tmigr_available_mutex);
|
||||
|
||||
cpumask_set_cpu(cpu, tmigr_available_cpumask);
|
||||
|
|
@ -1523,6 +1519,14 @@ static int tmigr_set_cpu_available(unsigned int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tmigr_set_cpu_available(unsigned int cpu)
|
||||
{
|
||||
if (tmigr_is_isolated(cpu))
|
||||
return 0;
|
||||
|
||||
return __tmigr_set_cpu_available(cpu);
|
||||
}
|
||||
|
||||
static void tmigr_cpu_isolate(struct work_struct *ignored)
|
||||
{
|
||||
tmigr_clear_cpu_available(smp_processor_id());
|
||||
|
|
@ -1530,7 +1534,12 @@ static void tmigr_cpu_isolate(struct work_struct *ignored)
|
|||
|
||||
static void tmigr_cpu_unisolate(struct work_struct *ignored)
|
||||
{
|
||||
tmigr_set_cpu_available(smp_processor_id());
|
||||
/*
|
||||
* Don't call tmigr_is_isolated() ->housekeeping_cpu() directly because
|
||||
* the cpuset mutex is correctly held by the workqueue caller but lockdep
|
||||
* doesn't know that.
|
||||
*/
|
||||
__tmigr_set_cpu_available(smp_processor_id());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -6959,13 +6959,16 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
|
|||
}
|
||||
|
||||
/**
|
||||
* workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
|
||||
* @exclude_cpumask: the cpumask to be excluded from wq_unbound_cpumask
|
||||
* workqueue_unbound_housekeeping_update - Propagate housekeeping cpumask update
|
||||
* @hk: the new housekeeping cpumask
|
||||
*
|
||||
* This function can be called from cpuset code to provide a set of isolated
|
||||
* CPUs that should be excluded from wq_unbound_cpumask.
|
||||
* Update the unbound workqueue cpumask on top of the new housekeeping cpumask such
|
||||
* that the effective unbound affinity is the intersection of the new housekeeping
|
||||
* with the requested affinity set via nohz_full=/isolcpus= or sysfs.
|
||||
*
|
||||
* Return: 0 on success and -errno on failure.
|
||||
*/
|
||||
int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask)
|
||||
int workqueue_unbound_housekeeping_update(const struct cpumask *hk)
|
||||
{
|
||||
cpumask_var_t cpumask;
|
||||
int ret = 0;
|
||||
|
|
@ -6981,14 +6984,14 @@ int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask)
|
|||
* (HK_TYPE_WQ ∩ HK_TYPE_DOMAIN) house keeping mask and rewritten
|
||||
* by any subsequent write to workqueue/cpumask sysfs file.
|
||||
*/
|
||||
if (!cpumask_andnot(cpumask, wq_requested_unbound_cpumask, exclude_cpumask))
|
||||
if (!cpumask_and(cpumask, wq_requested_unbound_cpumask, hk))
|
||||
cpumask_copy(cpumask, wq_requested_unbound_cpumask);
|
||||
if (!cpumask_equal(cpumask, wq_unbound_cpumask))
|
||||
ret = workqueue_apply_unbound_cpumask(cpumask);
|
||||
|
||||
/* Save the current isolated cpumask & export it via sysfs */
|
||||
if (!ret)
|
||||
cpumask_copy(wq_isolated_cpumask, exclude_cpumask);
|
||||
cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, hk);
|
||||
|
||||
mutex_unlock(&wq_pool_mutex);
|
||||
free_cpumask_var(cpumask);
|
||||
|
|
|
|||
|
|
@ -96,6 +96,8 @@ static bool cgroup_memory_nokmem __ro_after_init;
|
|||
/* BPF memory accounting disabled? */
|
||||
static bool cgroup_memory_nobpf __ro_after_init;
|
||||
|
||||
static struct workqueue_struct *memcg_wq __ro_after_init;
|
||||
|
||||
static struct kmem_cache *memcg_cachep;
|
||||
static struct kmem_cache *memcg_pn_cachep;
|
||||
|
||||
|
|
@ -2003,6 +2005,19 @@ static bool is_memcg_drain_needed(struct memcg_stock_pcp *stock,
|
|||
return flush;
|
||||
}
|
||||
|
||||
static void schedule_drain_work(int cpu, struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
* Protect housekeeping cpumask read and work enqueue together
|
||||
* in the same RCU critical section so that later cpuset isolated
|
||||
* partition update only need to wait for an RCU GP and flush the
|
||||
* pending work on newly isolated CPUs.
|
||||
*/
|
||||
guard(rcu)();
|
||||
if (!cpu_is_isolated(cpu))
|
||||
queue_work_on(cpu, memcg_wq, work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drains all per-CPU charge caches for given root_memcg resp. subtree
|
||||
* of the hierarchy under it.
|
||||
|
|
@ -2032,8 +2047,8 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
|
|||
&memcg_st->flags)) {
|
||||
if (cpu == curcpu)
|
||||
drain_local_memcg_stock(&memcg_st->work);
|
||||
else if (!cpu_is_isolated(cpu))
|
||||
schedule_work_on(cpu, &memcg_st->work);
|
||||
else
|
||||
schedule_drain_work(cpu, &memcg_st->work);
|
||||
}
|
||||
|
||||
if (!test_bit(FLUSHING_CACHED_CHARGE, &obj_st->flags) &&
|
||||
|
|
@ -2042,8 +2057,8 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
|
|||
&obj_st->flags)) {
|
||||
if (cpu == curcpu)
|
||||
drain_local_obj_stock(&obj_st->work);
|
||||
else if (!cpu_is_isolated(cpu))
|
||||
schedule_work_on(cpu, &obj_st->work);
|
||||
else
|
||||
schedule_drain_work(cpu, &obj_st->work);
|
||||
}
|
||||
}
|
||||
migrate_enable();
|
||||
|
|
@ -5112,6 +5127,11 @@ void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages)
|
|||
refill_stock(memcg, nr_pages);
|
||||
}
|
||||
|
||||
void mem_cgroup_flush_workqueue(void)
|
||||
{
|
||||
flush_workqueue(memcg_wq);
|
||||
}
|
||||
|
||||
static int __init cgroup_memory(char *s)
|
||||
{
|
||||
char *token;
|
||||
|
|
@ -5154,6 +5174,9 @@ int __init mem_cgroup_init(void)
|
|||
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
|
||||
memcg_hotplug_cpu_dead);
|
||||
|
||||
memcg_wq = alloc_workqueue("memcg", WQ_PERCPU, 0);
|
||||
WARN_ON(!memcg_wq);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
|
||||
drain_local_memcg_stock);
|
||||
|
|
|
|||
15
mm/vmstat.c
15
mm/vmstat.c
|
|
@ -2124,6 +2124,11 @@ static void vmstat_shepherd(struct work_struct *w);
|
|||
|
||||
static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
|
||||
|
||||
void vmstat_flush_workqueue(void)
|
||||
{
|
||||
flush_workqueue(mm_percpu_wq);
|
||||
}
|
||||
|
||||
static void vmstat_shepherd(struct work_struct *w)
|
||||
{
|
||||
int cpu;
|
||||
|
|
@ -2144,11 +2149,13 @@ static void vmstat_shepherd(struct work_struct *w)
|
|||
* infrastructure ever noticing. Skip regular flushing from vmstat_shepherd
|
||||
* for all isolated CPUs to avoid interference with the isolated workload.
|
||||
*/
|
||||
if (cpu_is_isolated(cpu))
|
||||
continue;
|
||||
scoped_guard(rcu) {
|
||||
if (cpu_is_isolated(cpu))
|
||||
continue;
|
||||
|
||||
if (!delayed_work_pending(dw) && need_update(cpu))
|
||||
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
|
||||
if (!delayed_work_pending(dw) && need_update(cpu))
|
||||
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1022,7 +1022,7 @@ static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
|
|||
int rps_cpumask_housekeeping(struct cpumask *mask)
|
||||
{
|
||||
if (!cpumask_empty(mask)) {
|
||||
cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||
cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT));
|
||||
cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
|
||||
if (cpumask_empty(mask))
|
||||
return -EINVAL;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue