linux/include/trace/events/pci.h
Shuai Xue d4318c1a79 PCI: trace: Add RAS tracepoint to monitor link speed changes
PCIe link speed degradation directly impacts system performance and often
indicates hardware issues such as faulty devices, physical layer problems,
or configuration errors.

To this end, add a RAS tracepoint to monitor link speed changes, enabling
proactive health checks and diagnostic analysis.

The following output is generated when a device is hotplugged:

  $ echo 1 > /sys/kernel/debug/tracing/events/pci/pcie_link_event/enable
  $ cat /sys/kernel/debug/tracing/trace_pipe
     irq/51-pciehp-88      [001] .....   381.545386: pcie_link_event: 0000:00:02.0 type:4, reason:4, cur_bus_speed:20, max_bus_speed:23, width:1, flit_mode:0, status:DLLLA

Suggested-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Suggested-by: Matthew W Carlis <mattc@purestorage.com>
Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://patch.msgid.link/20251210132907.58799-3-xueshuai@linux.alibaba.com
2025-12-23 16:06:00 -06:00

129 lines
3.4 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pci
#if !defined(_TRACE_HW_EVENT_PCI_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HW_EVENT_PCI_H
#include <uapi/linux/pci_regs.h>
#include <linux/tracepoint.h>
#define PCI_HOTPLUG_EVENT \
EM(PCI_HOTPLUG_LINK_UP, "LINK_UP") \
EM(PCI_HOTPLUG_LINK_DOWN, "LINK_DOWN") \
EM(PCI_HOTPLUG_CARD_PRESENT, "CARD_PRESENT") \
EMe(PCI_HOTPLUG_CARD_NOT_PRESENT, "CARD_NOT_PRESENT")
/* Enums require being exported to userspace, for user tool parsing */
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
PCI_HOTPLUG_EVENT
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
*/
#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}
/*
* Note: For generic PCI hotplug events, we pass already-resolved strings
* (port_name, slot) instead of driver-specific structures like 'struct
* controller'. This is because different PCI hotplug drivers (pciehp, cpqphp,
* ibmphp, shpchp) define their own versions of 'struct controller' with
* different fields and helper functions. Using driver-specific structures would
* make the tracepoint interface non-generic and cause compatibility issues
* across different drivers.
*/
TRACE_EVENT(pci_hp_event,
TP_PROTO(const char *port_name,
const char *slot,
const int event),
TP_ARGS(port_name, slot, event),
TP_STRUCT__entry(
__string( port_name, port_name )
__string( slot, slot )
__field( int, event )
),
TP_fast_assign(
__assign_str(port_name);
__assign_str(slot);
__entry->event = event;
),
TP_printk("%s slot:%s, event:%s\n",
__get_str(port_name),
__get_str(slot),
__print_symbolic(__entry->event, PCI_HOTPLUG_EVENT)
)
);
#define PCI_EXP_LNKSTA_LINK_STATUS_MASK (PCI_EXP_LNKSTA_LBMS | \
PCI_EXP_LNKSTA_LABS | \
PCI_EXP_LNKSTA_LT | \
PCI_EXP_LNKSTA_DLLLA)
#define LNKSTA_FLAGS \
{ PCI_EXP_LNKSTA_LT, "LT"}, \
{ PCI_EXP_LNKSTA_DLLLA, "DLLLA"}, \
{ PCI_EXP_LNKSTA_LBMS, "LBMS"}, \
{ PCI_EXP_LNKSTA_LABS, "LABS"}
TRACE_EVENT(pcie_link_event,
TP_PROTO(struct pci_bus *bus,
unsigned int reason,
unsigned int width,
unsigned int status
),
TP_ARGS(bus, reason, width, status),
TP_STRUCT__entry(
__string( port_name, pci_name(bus->self))
__field( unsigned int, type )
__field( unsigned int, reason )
__field( unsigned int, cur_bus_speed )
__field( unsigned int, max_bus_speed )
__field( unsigned int, width )
__field( unsigned int, flit_mode )
__field( unsigned int, link_status )
),
TP_fast_assign(
__assign_str(port_name);
__entry->type = pci_pcie_type(bus->self);
__entry->reason = reason;
__entry->cur_bus_speed = bus->cur_bus_speed;
__entry->max_bus_speed = bus->max_bus_speed;
__entry->width = width;
__entry->flit_mode = bus->flit_mode;
__entry->link_status = status;
),
TP_printk("%s type:%d, reason:%d, cur_bus_speed:%d, max_bus_speed:%d, width:%u, flit_mode:%u, status:%s\n",
__get_str(port_name),
__entry->type,
__entry->reason,
__entry->cur_bus_speed,
__entry->max_bus_speed,
__entry->width,
__entry->flit_mode,
__print_flags((unsigned long)__entry->link_status, "|",
LNKSTA_FLAGS)
)
);
#endif /* _TRACE_HW_EVENT_PCI_H */
/* This part must be outside protection */
#include <trace/define_trace.h>