mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 05:04:51 +01:00
Miscellaneous x86 fixes:
- Fix SEV guest boot failures in certain circumstances, due
to very early code relying on a BSS-zeroed variable that
isn't actually zeroed yet an may contain non-zero bootup
values. Move the variable into the .data section go gain
even earlier zeroing.
- Expose & allow the IBPB-on-Entry feature on SNP guests,
which was not properly exposed to guests due to initial
implementational caution.
- Fix O= build failure when CONFIG_EFI_SBAT_FILE is using
relative file paths.
- 4 commits to fix the various SNC (Sub-NUMA Clustering)
topology enumeration bugs/artifacts (sched-domain build
errors mostly). SNC enumeration data got more complicated
with Granite Rapids X (GNR) and Clearwater Forest X (CWF),
which exposed these bugs and made their effects more
serious.
- Also use the now sane(r) SNC code to fix resctrl SNC
detection bugs.
- Work around a historic libgcc unwinder bug in the vdso32
sigreturn code (again), which regressed during an overly
aggressive recent cleanup of DWARF annotations.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmmsy9wRHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1ieiQ/7B2Rfm5vR5rQLlAv26iEMypIwoCiCMgzA
YD3nOMFl6aGhphKryiU0b4MDhAIASN9X6mZloryUKyol1oKP0evkWXSk/0J+k+V9
lS7uIVL+8nPTSl3gQE7ARzJ9jakFN49VzDheZjsjIHC0+n+yvCJU6xSx8IKeiTSW
axpX8R33M3Fj+u5anF3m37OdFTgiYxFO0t5VNFgWP4H9367yC/wnHPuDyidAdJ/N
B7PL1L3rG3+w/4np81Xwi/rThwgsSWarVLNuMJuGM5wujMr8mQGhuWaeLiPgTx7G
wze1iarWvp5uqamGztpy/4WMD1x0yBX9CCSocnwF48Fh1yTww5+uwOZn5e5fZxYr
vDhCH6+DB8Rt3Wj+/3RBzHSFe7rNq+f86U84uxTwyOs5eC5sGUuyH15lCt4dP9ZO
uQfW0dQRwvUXCGXJxxZdIR0nq/vEJUmQ+DLLL6zkCj24t9ND5IPAkBLVn7P5PO5s
qv8dPpldSq57V4comqW8oDAqLL0OeS1qgggxlHzqAdrMmt+IVKWvteRXrkgy1m9Y
Bt0EbdghUTZkn9+FcUTorVA/pZHL5sYCiuGQxNbaaLmMWrcX4I3XnEtpzgukHh8e
BL1blJWAm/4cuhGXb4RF7AZMQgTU56greOU385Afc1Qz2lzohGO4lqgGOH8L0ZEh
KqEX1IS0ZbI=
=KlDX
-----END PGP SIGNATURE-----
Merge tag 'x86-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:
- Fix SEV guest boot failures in certain circumstances, due to
very early code relying on a BSS-zeroed variable that isn't
actually zeroed yet an may contain non-zero bootup values
Move the variable into the .data section go gain even earlier
zeroing
- Expose & allow the IBPB-on-Entry feature on SNP guests, which
was not properly exposed to guests due to initial implementational
caution
- Fix O= build failure when CONFIG_EFI_SBAT_FILE is using relative
file paths
- Fix the various SNC (Sub-NUMA Clustering) topology enumeration
bugs/artifacts (sched-domain build errors mostly).
SNC enumeration data got more complicated with Granite Rapids X
(GNR) and Clearwater Forest X (CWF), which exposed these bugs
and made their effects more serious
- Also use the now sane(r) SNC code to fix resctrl SNC detection bugs
- Work around a historic libgcc unwinder bug in the vdso32 sigreturn
code (again), which regressed during an overly aggressive recent
cleanup of DWARF annotations
* tag 'x86-urgent-2026-03-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/vdso32: Work around libgcc unwinder bug
x86/resctrl: Fix SNC detection
x86/topo: Fix SNC topology mess
x86/topo: Replace x86_has_numa_in_package
x86/topo: Add topology_num_nodes_per_package()
x86/numa: Store extra copy of numa_nodes_parsed
x86/boot: Handle relative CONFIG_EFI_SBAT_FILE file paths
x86/sev: Allow IBPB-on-Entry feature for SNP guests
x86/boot/sev: Move SEV decompressor variables into the .data section
This commit is contained in:
commit
c23719abc3
14 changed files with 227 additions and 94 deletions
|
|
@ -113,6 +113,7 @@ vmlinux-objs-$(CONFIG_EFI_SBAT) += $(obj)/sbat.o
|
|||
|
||||
ifdef CONFIG_EFI_SBAT
|
||||
$(obj)/sbat.o: $(CONFIG_EFI_SBAT_FILE)
|
||||
AFLAGS_sbat.o += -I $(srctree)
|
||||
endif
|
||||
|
||||
$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
|
||||
|
|
|
|||
|
|
@ -28,17 +28,17 @@
|
|||
#include "sev.h"
|
||||
|
||||
static struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
|
||||
struct ghcb *boot_ghcb;
|
||||
struct ghcb *boot_ghcb __section(".data");
|
||||
|
||||
#undef __init
|
||||
#define __init
|
||||
|
||||
#define __BOOT_COMPRESSED
|
||||
|
||||
u8 snp_vmpl;
|
||||
u16 ghcb_version;
|
||||
u8 snp_vmpl __section(".data");
|
||||
u16 ghcb_version __section(".data");
|
||||
|
||||
u64 boot_svsm_caa_pa;
|
||||
u64 boot_svsm_caa_pa __section(".data");
|
||||
|
||||
/* Include code for early handlers */
|
||||
#include "../../boot/startup/sev-shared.c"
|
||||
|
|
@ -188,6 +188,7 @@ bool sev_es_check_ghcb_fault(unsigned long address)
|
|||
MSR_AMD64_SNP_RESERVED_BIT13 | \
|
||||
MSR_AMD64_SNP_RESERVED_BIT15 | \
|
||||
MSR_AMD64_SNP_SECURE_AVIC | \
|
||||
MSR_AMD64_SNP_RESERVED_BITS19_22 | \
|
||||
MSR_AMD64_SNP_RESERVED_MASK)
|
||||
|
||||
#ifdef CONFIG_AMD_SECURE_AVIC
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ static u32 cpuid_std_range_max __ro_after_init;
|
|||
static u32 cpuid_hyp_range_max __ro_after_init;
|
||||
static u32 cpuid_ext_range_max __ro_after_init;
|
||||
|
||||
bool sev_snp_needs_sfw;
|
||||
bool sev_snp_needs_sfw __section(".data");
|
||||
|
||||
void __noreturn
|
||||
sev_es_terminate(unsigned int set, unsigned int reason)
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ static const char * const sev_status_feat_names[] = {
|
|||
[MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt",
|
||||
[MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt",
|
||||
[MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC",
|
||||
[MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry",
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -35,9 +35,38 @@
|
|||
#endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* WARNING:
|
||||
*
|
||||
* A bug in the libgcc unwinder as of at least gcc 15.2 (2026) means that
|
||||
* the unwinder fails to recognize the signal frame flag.
|
||||
*
|
||||
* There is a hacky legacy fallback path in libgcc which ends up
|
||||
* getting invoked instead. It happens to work as long as BOTH of the
|
||||
* following conditions are true:
|
||||
*
|
||||
* 1. There is at least one byte before the each of the sigreturn
|
||||
* functions which falls outside any function. This is enforced by
|
||||
* an explicit nop instruction before the ALIGN.
|
||||
* 2. The code sequences between the entry point up to and including
|
||||
* the int $0x80 below need to match EXACTLY. Do not change them
|
||||
* in any way. The exact byte sequences are:
|
||||
*
|
||||
* __kernel_sigreturn:
|
||||
* 0: 58 pop %eax
|
||||
* 1: b8 77 00 00 00 mov $0x77,%eax
|
||||
* 6: cd 80 int $0x80
|
||||
*
|
||||
* __kernel_rt_sigreturn:
|
||||
* 0: b8 ad 00 00 00 mov $0xad,%eax
|
||||
* 5: cd 80 int $0x80
|
||||
*
|
||||
* For details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124050
|
||||
*/
|
||||
.text
|
||||
.globl __kernel_sigreturn
|
||||
.type __kernel_sigreturn,@function
|
||||
nop /* libgcc hack: see comment above */
|
||||
ALIGN
|
||||
__kernel_sigreturn:
|
||||
STARTPROC_SIGNAL_FRAME IA32_SIGFRAME_sigcontext
|
||||
|
|
@ -52,6 +81,7 @@ SYM_INNER_LABEL(vdso32_sigreturn_landing_pad, SYM_L_GLOBAL)
|
|||
|
||||
.globl __kernel_rt_sigreturn
|
||||
.type __kernel_rt_sigreturn,@function
|
||||
nop /* libgcc hack: see comment above */
|
||||
ALIGN
|
||||
__kernel_rt_sigreturn:
|
||||
STARTPROC_SIGNAL_FRAME IA32_RT_SIGFRAME_sigcontext
|
||||
|
|
|
|||
|
|
@ -740,7 +740,10 @@
|
|||
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
|
||||
#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
|
||||
#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
|
||||
#define MSR_AMD64_SNP_RESV_BIT 19
|
||||
#define MSR_AMD64_SNP_RESERVED_BITS19_22 GENMASK_ULL(22, 19)
|
||||
#define MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT 23
|
||||
#define MSR_AMD64_SNP_IBPB_ON_ENTRY BIT_ULL(MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT)
|
||||
#define MSR_AMD64_SNP_RESV_BIT 24
|
||||
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
|
||||
#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
|
||||
#define MSR_AMD64_SAVIC_EN_BIT 0
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ extern int numa_off;
|
|||
*/
|
||||
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
|
||||
extern nodemask_t numa_nodes_parsed __initdata;
|
||||
extern nodemask_t numa_phys_nodes_parsed __initdata;
|
||||
|
||||
static inline void set_apicid_to_node(int apicid, s16 node)
|
||||
{
|
||||
|
|
@ -48,6 +49,7 @@ extern void __init init_cpu_to_node(void);
|
|||
extern void numa_add_cpu(unsigned int cpu);
|
||||
extern void numa_remove_cpu(unsigned int cpu);
|
||||
extern void init_gi_nodes(void);
|
||||
extern int num_phys_nodes(void);
|
||||
#else /* CONFIG_NUMA */
|
||||
static inline void numa_set_node(int cpu, int node) { }
|
||||
static inline void numa_clear_node(int cpu) { }
|
||||
|
|
@ -55,6 +57,10 @@ static inline void init_cpu_to_node(void) { }
|
|||
static inline void numa_add_cpu(unsigned int cpu) { }
|
||||
static inline void numa_remove_cpu(unsigned int cpu) { }
|
||||
static inline void init_gi_nodes(void) { }
|
||||
static inline int num_phys_nodes(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ extern unsigned int __max_logical_packages;
|
|||
extern unsigned int __max_threads_per_core;
|
||||
extern unsigned int __num_threads_per_package;
|
||||
extern unsigned int __num_cores_per_package;
|
||||
extern unsigned int __num_nodes_per_package;
|
||||
|
||||
const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
|
||||
enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
|
||||
|
|
@ -179,6 +180,11 @@ static inline unsigned int topology_num_threads_per_package(void)
|
|||
return __num_threads_per_package;
|
||||
}
|
||||
|
||||
static inline unsigned int topology_num_nodes_per_package(void)
|
||||
{
|
||||
return __num_nodes_per_package;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level);
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -95,6 +95,9 @@ EXPORT_SYMBOL(__max_dies_per_package);
|
|||
unsigned int __max_logical_packages __ro_after_init = 1;
|
||||
EXPORT_SYMBOL(__max_logical_packages);
|
||||
|
||||
unsigned int __num_nodes_per_package __ro_after_init = 1;
|
||||
EXPORT_SYMBOL(__num_nodes_per_package);
|
||||
|
||||
unsigned int __num_cores_per_package __ro_after_init = 1;
|
||||
EXPORT_SYMBOL(__num_cores_per_package);
|
||||
|
||||
|
|
|
|||
|
|
@ -364,7 +364,7 @@ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
|
|||
msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
|
||||
}
|
||||
|
||||
/* CPU models that support MSR_RMID_SNC_CONFIG */
|
||||
/* CPU models that support SNC and MSR_RMID_SNC_CONFIG */
|
||||
static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
|
||||
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
|
||||
|
|
@ -375,40 +375,14 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
|
|||
{}
|
||||
};
|
||||
|
||||
/*
|
||||
* There isn't a simple hardware bit that indicates whether a CPU is running
|
||||
* in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
|
||||
* number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
|
||||
* the same NUMA node as CPU0.
|
||||
* It is not possible to accurately determine SNC state if the system is
|
||||
* booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
|
||||
* to L3 caches. It will be OK if system is booted with hyperthreading
|
||||
* disabled (since this doesn't affect the ratio).
|
||||
*/
|
||||
static __init int snc_get_config(void)
|
||||
{
|
||||
struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
|
||||
const cpumask_t *node0_cpumask;
|
||||
int cpus_per_node, cpus_per_l3;
|
||||
int ret;
|
||||
int ret = topology_num_nodes_per_package();
|
||||
|
||||
if (!x86_match_cpu(snc_cpu_ids) || !ci)
|
||||
if (ret > 1 && !x86_match_cpu(snc_cpu_ids)) {
|
||||
pr_warn("CoD enabled system? Resctrl not supported\n");
|
||||
return 1;
|
||||
|
||||
cpus_read_lock();
|
||||
if (num_online_cpus() != num_present_cpus())
|
||||
pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
|
||||
cpus_read_unlock();
|
||||
|
||||
node0_cpumask = cpumask_of_node(cpu_to_node(0));
|
||||
|
||||
cpus_per_node = cpumask_weight(node0_cpumask);
|
||||
cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
|
||||
|
||||
if (!cpus_per_node || !cpus_per_l3)
|
||||
return 1;
|
||||
|
||||
ret = cpus_per_l3 / cpus_per_node;
|
||||
}
|
||||
|
||||
/* sanity check: Only valid results are 1, 2, 3, 4, 6 */
|
||||
switch (ret) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include <asm/mpspec.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/numa.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
|
|
@ -492,11 +493,19 @@ void __init topology_init_possible_cpus(void)
|
|||
set_nr_cpu_ids(allowed);
|
||||
|
||||
cnta = domain_weight(TOPO_PKG_DOMAIN);
|
||||
cntb = domain_weight(TOPO_DIE_DOMAIN);
|
||||
__max_logical_packages = cnta;
|
||||
|
||||
pr_info("Max. logical packages: %3u\n", __max_logical_packages);
|
||||
|
||||
cntb = num_phys_nodes();
|
||||
__num_nodes_per_package = DIV_ROUND_UP(cntb, cnta);
|
||||
|
||||
pr_info("Max. logical nodes: %3u\n", cntb);
|
||||
pr_info("Num. nodes per package:%3u\n", __num_nodes_per_package);
|
||||
|
||||
cntb = domain_weight(TOPO_DIE_DOMAIN);
|
||||
__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
|
||||
|
||||
pr_info("Max. logical packages: %3u\n", cnta);
|
||||
pr_info("Max. logical dies: %3u\n", cntb);
|
||||
pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
|
||||
|
||||
|
|
|
|||
|
|
@ -468,13 +468,6 @@ static int x86_cluster_flags(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set if a package/die has multiple NUMA nodes inside.
|
||||
* AMD Magny-Cours, Intel Cluster-on-Die, and Intel
|
||||
* Sub-NUMA Clustering have this.
|
||||
*/
|
||||
static bool x86_has_numa_in_package;
|
||||
|
||||
static struct sched_domain_topology_level x86_topology[] = {
|
||||
SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
|
||||
#ifdef CONFIG_SCHED_CLUSTER
|
||||
|
|
@ -496,7 +489,7 @@ static void __init build_sched_topology(void)
|
|||
* PKG domain since the NUMA domains will auto-magically create the
|
||||
* right spanning domains based on the SLIT.
|
||||
*/
|
||||
if (x86_has_numa_in_package) {
|
||||
if (topology_num_nodes_per_package() > 1) {
|
||||
unsigned int pkgdom = ARRAY_SIZE(x86_topology) - 2;
|
||||
|
||||
memset(&x86_topology[pkgdom], 0, sizeof(x86_topology[pkgdom]));
|
||||
|
|
@ -513,33 +506,149 @@ static void __init build_sched_topology(void)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static int sched_avg_remote_distance;
|
||||
static int avg_remote_numa_distance(void)
|
||||
/*
|
||||
* Test if the on-trace cluster at (N,N) is symmetric.
|
||||
* Uses upper triangle iteration to avoid obvious duplicates.
|
||||
*/
|
||||
static bool slit_cluster_symmetric(int N)
|
||||
{
|
||||
int i, j;
|
||||
int distance, nr_remote, total_distance;
|
||||
int u = topology_num_nodes_per_package();
|
||||
|
||||
if (sched_avg_remote_distance > 0)
|
||||
return sched_avg_remote_distance;
|
||||
|
||||
nr_remote = 0;
|
||||
total_distance = 0;
|
||||
for_each_node_state(i, N_CPU) {
|
||||
for_each_node_state(j, N_CPU) {
|
||||
distance = node_distance(i, j);
|
||||
|
||||
if (distance >= REMOTE_DISTANCE) {
|
||||
nr_remote++;
|
||||
total_distance += distance;
|
||||
}
|
||||
for (int k = 0; k < u; k++) {
|
||||
for (int l = k; l < u; l++) {
|
||||
if (node_distance(N + k, N + l) !=
|
||||
node_distance(N + l, N + k))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (nr_remote)
|
||||
sched_avg_remote_distance = total_distance / nr_remote;
|
||||
else
|
||||
sched_avg_remote_distance = REMOTE_DISTANCE;
|
||||
|
||||
return sched_avg_remote_distance;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the package-id of the cluster, or ~0 if indeterminate.
|
||||
* Each node in the on-trace cluster should have the same package-id.
|
||||
*/
|
||||
static u32 slit_cluster_package(int N)
|
||||
{
|
||||
int u = topology_num_nodes_per_package();
|
||||
u32 pkg_id = ~0;
|
||||
|
||||
for (int n = 0; n < u; n++) {
|
||||
const struct cpumask *cpus = cpumask_of_node(N + n);
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpus) {
|
||||
u32 id = topology_logical_package_id(cpu);
|
||||
|
||||
if (pkg_id == ~0)
|
||||
pkg_id = id;
|
||||
if (pkg_id != id)
|
||||
return ~0;
|
||||
}
|
||||
}
|
||||
|
||||
return pkg_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate the SLIT table is of the form expected for SNC, specifically:
|
||||
*
|
||||
* - each on-trace cluster should be symmetric,
|
||||
* - each on-trace cluster should have a unique package-id.
|
||||
*
|
||||
* If you NUMA_EMU on top of SNC, you get to keep the pieces.
|
||||
*/
|
||||
static bool slit_validate(void)
|
||||
{
|
||||
int u = topology_num_nodes_per_package();
|
||||
u32 pkg_id, prev_pkg_id = ~0;
|
||||
|
||||
for (int pkg = 0; pkg < topology_max_packages(); pkg++) {
|
||||
int n = pkg * u;
|
||||
|
||||
/*
|
||||
* Ensure the on-trace cluster is symmetric and each cluster
|
||||
* has a different package id.
|
||||
*/
|
||||
if (!slit_cluster_symmetric(n))
|
||||
return false;
|
||||
pkg_id = slit_cluster_package(n);
|
||||
if (pkg_id == ~0)
|
||||
return false;
|
||||
if (pkg && pkg_id == prev_pkg_id)
|
||||
return false;
|
||||
|
||||
prev_pkg_id = pkg_id;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a sanitized SLIT table for SNC; notably SNC-3 can end up with
|
||||
* asymmetric off-trace clusters, reflecting physical assymmetries. However
|
||||
* this leads to 'unfortunate' sched_domain configurations.
|
||||
*
|
||||
* For example dual socket GNR with SNC-3:
|
||||
*
|
||||
* node distances:
|
||||
* node 0 1 2 3 4 5
|
||||
* 0: 10 15 17 21 28 26
|
||||
* 1: 15 10 15 23 26 23
|
||||
* 2: 17 15 10 26 23 21
|
||||
* 3: 21 28 26 10 15 17
|
||||
* 4: 23 26 23 15 10 15
|
||||
* 5: 26 23 21 17 15 10
|
||||
*
|
||||
* Fix things up by averaging out the off-trace clusters; resulting in:
|
||||
*
|
||||
* node 0 1 2 3 4 5
|
||||
* 0: 10 15 17 24 24 24
|
||||
* 1: 15 10 15 24 24 24
|
||||
* 2: 17 15 10 24 24 24
|
||||
* 3: 24 24 24 10 15 17
|
||||
* 4: 24 24 24 15 10 15
|
||||
* 5: 24 24 24 17 15 10
|
||||
*/
|
||||
static int slit_cluster_distance(int i, int j)
|
||||
{
|
||||
static int slit_valid = -1;
|
||||
int u = topology_num_nodes_per_package();
|
||||
long d = 0;
|
||||
int x, y;
|
||||
|
||||
if (slit_valid < 0) {
|
||||
slit_valid = slit_validate();
|
||||
if (!slit_valid)
|
||||
pr_err(FW_BUG "SLIT table doesn't have the expected form for SNC -- fixup disabled!\n");
|
||||
else
|
||||
pr_info("Fixing up SNC SLIT table.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Is this a unit cluster on the trace?
|
||||
*/
|
||||
if ((i / u) == (j / u) || !slit_valid)
|
||||
return node_distance(i, j);
|
||||
|
||||
/*
|
||||
* Off-trace cluster.
|
||||
*
|
||||
* Notably average out the symmetric pair of off-trace clusters to
|
||||
* ensure the resulting SLIT table is symmetric.
|
||||
*/
|
||||
x = i - (i % u);
|
||||
y = j - (j % u);
|
||||
|
||||
for (i = x; i < x + u; i++) {
|
||||
for (j = y; j < y + u; j++) {
|
||||
d += node_distance(i, j);
|
||||
d += node_distance(j, i);
|
||||
}
|
||||
}
|
||||
|
||||
return d / (2*u*u);
|
||||
}
|
||||
|
||||
int arch_sched_node_distance(int from, int to)
|
||||
|
|
@ -549,34 +658,14 @@ int arch_sched_node_distance(int from, int to)
|
|||
switch (boot_cpu_data.x86_vfm) {
|
||||
case INTEL_GRANITERAPIDS_X:
|
||||
case INTEL_ATOM_DARKMONT_X:
|
||||
|
||||
if (!x86_has_numa_in_package || topology_max_packages() == 1 ||
|
||||
d < REMOTE_DISTANCE)
|
||||
if (topology_max_packages() == 1 ||
|
||||
topology_num_nodes_per_package() < 3)
|
||||
return d;
|
||||
|
||||
/*
|
||||
* With SNC enabled, there could be too many levels of remote
|
||||
* NUMA node distances, creating NUMA domain levels
|
||||
* including local nodes and partial remote nodes.
|
||||
*
|
||||
* Trim finer distance tuning for NUMA nodes in remote package
|
||||
* for the purpose of building sched domains. Group NUMA nodes
|
||||
* in the remote package in the same sched group.
|
||||
* Simplify NUMA domains and avoid extra NUMA levels including
|
||||
* different remote NUMA nodes and local nodes.
|
||||
*
|
||||
* GNR and CWF don't expect systems with more than 2 packages
|
||||
* and more than 2 hops between packages. Single average remote
|
||||
* distance won't be appropriate if there are more than 2
|
||||
* packages as average distance to different remote packages
|
||||
* could be different.
|
||||
* Handle SNC-3 asymmetries.
|
||||
*/
|
||||
WARN_ONCE(topology_max_packages() > 2,
|
||||
"sched: Expect only up to 2 packages for GNR or CWF, "
|
||||
"but saw %d packages when building sched domains.",
|
||||
topology_max_packages());
|
||||
|
||||
d = avg_remote_numa_distance();
|
||||
return slit_cluster_distance(from, to);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
|
@ -606,7 +695,7 @@ void set_cpu_sibling_map(int cpu)
|
|||
o = &cpu_data(i);
|
||||
|
||||
if (match_pkg(c, o) && !topology_same_node(c, o))
|
||||
x86_has_numa_in_package = true;
|
||||
WARN_ON_ONCE(topology_num_nodes_per_package() == 1);
|
||||
|
||||
if ((i == cpu) || (has_smt && match_smt(c, o)))
|
||||
link_mask(topology_sibling_cpumask, cpu, i);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] = {
|
|||
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
|
||||
};
|
||||
|
||||
nodemask_t numa_phys_nodes_parsed __initdata;
|
||||
|
||||
int numa_cpu_node(int cpu)
|
||||
{
|
||||
u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
|
||||
|
|
@ -57,6 +59,11 @@ int numa_cpu_node(int cpu)
|
|||
return NUMA_NO_NODE;
|
||||
}
|
||||
|
||||
int __init num_phys_nodes(void)
|
||||
{
|
||||
return bitmap_weight(numa_phys_nodes_parsed.bits, MAX_NUMNODES);
|
||||
}
|
||||
|
||||
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
|
||||
EXPORT_SYMBOL(node_to_cpumask_map);
|
||||
|
||||
|
|
@ -210,6 +217,7 @@ static int __init dummy_numa_init(void)
|
|||
0LLU, PFN_PHYS(max_pfn) - 1);
|
||||
|
||||
node_set(0, numa_nodes_parsed);
|
||||
node_set(0, numa_phys_nodes_parsed);
|
||||
numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
|
|||
}
|
||||
set_apicid_to_node(apic_id, node);
|
||||
node_set(node, numa_nodes_parsed);
|
||||
node_set(node, numa_phys_nodes_parsed);
|
||||
pr_debug("SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node);
|
||||
}
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
|
|||
|
||||
set_apicid_to_node(apic_id, node);
|
||||
node_set(node, numa_nodes_parsed);
|
||||
node_set(node, numa_phys_nodes_parsed);
|
||||
pr_debug("SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue