mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
Merge branch 'for-7.0/cxl-prm-translation' into cxl-for-next
Add support for normalized CXL address translation through ACPI PRM method to support AMD Zen5 platforms. Including a conventions doc that explains how the translation is implemented and for future implementations that need such setup to comply with the current implementation method. cxl: Disable HPA/SPA translation handlers for Normalized Addressing cxl/region: Factor out code into cxl_region_setup_poison() cxl/atl: Lock decoders that need address translation cxl: Enable AMD Zen5 address translation using ACPI PRMT cxl/acpi: Prepare use of EFI runtime services cxl: Introduce callback for HPA address ranges translation cxl/region: Use region data to get the root decoder cxl/region: Add @hpa_range argument to function cxl_calc_interleave_pos() cxl/region: Separate region parameter setup and region construction cxl: Simplify cxl_root_ops allocation and handling cxl/region: Store HPA range in struct cxl_region cxl/region: Store root decoder in struct cxl_region cxl/region: Rename misleading variable name @hpa to @hpa_range Documentation/driver-api/cxl: ACPI PRM Address Translation Support and AMD Zen5 enablement cxl, doc: Moving conventions in separate files cxl, doc: Remove isonum.txt inclusion
This commit is contained in:
commit
63fbf275fa
14 changed files with 911 additions and 291 deletions
|
|
@ -1,9 +1,7 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=======================================
|
||||
Compute Express Link: Linux Conventions
|
||||
=======================================
|
||||
#######################################
|
||||
|
||||
There exists shipping platforms that bend or break CXL specification
|
||||
expectations. Record the details and the rationale for those deviations.
|
||||
|
|
@ -11,172 +9,10 @@ Borrow the ACPI Code First template format to capture the assumptions
|
|||
and tradeoffs such that multiple platform implementations can follow the
|
||||
same convention.
|
||||
|
||||
<(template) Title>
|
||||
==================
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Contents
|
||||
|
||||
Document
|
||||
--------
|
||||
CXL Revision <rev>, Version <ver>
|
||||
|
||||
License
|
||||
-------
|
||||
SPDX-License Identifier: CC-BY-4.0
|
||||
|
||||
Creator/Contributors
|
||||
--------------------
|
||||
|
||||
Summary of the Change
|
||||
---------------------
|
||||
|
||||
<Detail the conflict with the specification and where available the
|
||||
assumptions and tradeoffs taken by the hardware platform.>
|
||||
|
||||
|
||||
Benefits of the Change
|
||||
----------------------
|
||||
|
||||
<Detail what happens if platforms and Linux do not adopt this
|
||||
convention.>
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Detailed Description of the Change
|
||||
----------------------------------
|
||||
|
||||
<Propose spec language that corrects the conflict.>
|
||||
|
||||
|
||||
Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders
|
||||
============================================================================
|
||||
|
||||
Document
|
||||
--------
|
||||
|
||||
CXL Revision 3.2, Version 1.0
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
SPDX-License Identifier: CC-BY-4.0
|
||||
|
||||
Creator/Contributors
|
||||
--------------------
|
||||
|
||||
- Fabio M. De Francesco, Intel
|
||||
- Dan J. Williams, Intel
|
||||
- Mahesh Natu, Intel
|
||||
|
||||
Summary of the Change
|
||||
---------------------
|
||||
|
||||
According to the current Compute Express Link (CXL) Specifications (Revision
|
||||
3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero
|
||||
or more Host Physical Address (HPA) windows associated with each CXL Host
|
||||
Bridge. Each window represents a contiguous HPA range that may be interleaved
|
||||
across one or more targets, including CXL Host Bridges. Each window has a set
|
||||
of restrictions that govern its usage. It is the Operating System-directed
|
||||
configuration and Power Management (OSPM) responsibility to utilize each window
|
||||
for the specified use.
|
||||
|
||||
Table 9-22 of the current CXL Specifications states that the Window Size field
|
||||
contains the total number of consecutive bytes of HPA this window describes.
|
||||
This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB.
|
||||
|
||||
Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a
|
||||
memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases,
|
||||
the CFMWS Range Size may not adhere to the NIW * 256 MB rule.
|
||||
|
||||
The HPA represents the actual physical memory address space that the CXL devices
|
||||
can decode and respond to, while the System Physical Address (SPA), a related
|
||||
but distinct concept, represents the system-visible address space that users can
|
||||
direct transaction to and so it excludes reserved regions.
|
||||
|
||||
BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms
|
||||
with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole,
|
||||
resulting in lost capacity in the Endpoints with no SPA to map to that part of
|
||||
the HPA range that intersects the hole.
|
||||
|
||||
E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB:
|
||||
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
| Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways |
|
||||
+========+============+===================+==================+===================+======+
|
||||
| 0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 |
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
| 1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 |
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
|
||||
HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of
|
||||
a 12 ways region and all the intermediate Switch Decoders. They are configured
|
||||
by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of
|
||||
3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root
|
||||
Decoder HPA range that results smaller (2GB) than that of the Switch and
|
||||
Endpoint Decoders in the hierarchy (3GB).
|
||||
|
||||
This creates 2 issues which lead to a failure to construct a region:
|
||||
|
||||
1) A mismatch in region size between root and any HDM decoder. The root decoders
|
||||
will always be smaller due to the trim.
|
||||
|
||||
2) The trim causes the root decoder to violate the (NIW * 256MB) rule.
|
||||
|
||||
This change allows a region with a base address of 0GB to bypass these checks to
|
||||
allow for region creation with the trimmed root decoder address range.
|
||||
|
||||
This change does not allow for any other arbitrary region to violate these
|
||||
checks - it is intended exclusively to enable x86 platforms which map CXL memory
|
||||
under 4GB.
|
||||
|
||||
Despite the HDM decoders covering the PCIE hole HPA region, it is expected that
|
||||
the platform will never route address accesses to the CXL complex because the
|
||||
root decoder only covers the trimmed region (which excludes this). This is
|
||||
outside the ability of Linux to enforce.
|
||||
|
||||
On the example platform, only the first 2GB will be potentially usable, but
|
||||
Linux, aiming to adhere to the current specifications, fails to construct
|
||||
Regions and attach Endpoint and intermediate Switch Decoders to them.
|
||||
|
||||
There are several points of failure that due to the expectation that the Root
|
||||
Decoder HPA size, that is equal to the CFMWS from which it is configured, has
|
||||
to be greater or equal to the matching Switch and Endpoint HDM Decoders.
|
||||
|
||||
In order to succeed with construction and attachment, Linux must construct a
|
||||
Region with Root Decoder HPA range size, and then attach to that all the
|
||||
intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy
|
||||
regardless of their range sizes.
|
||||
|
||||
Benefits of the Change
|
||||
----------------------
|
||||
|
||||
Without the change, the OSPM wouldn't match intermediate Switch and Endpoint
|
||||
Decoders with Root Decoders configured with CFMWS HPA sizes that don't align
|
||||
with the NIW * 256MB constraint, and so it leads to lost memdev capacity.
|
||||
|
||||
This change allows the OSPM to construct Regions and attach intermediate Switch
|
||||
and Endpoint Decoders to them, so that the addressable part of the memory
|
||||
devices total capacity is made available to the users.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Compute Express Link Specification Revision 3.2, Version 1.0
|
||||
<https://www.computeexpresslink.org/>
|
||||
|
||||
Detailed Description of the Change
|
||||
----------------------------------
|
||||
|
||||
The description of the Window Size field in table 9-22 needs to account for
|
||||
platforms with Low Memory Holes, where SPA ranges might be subsets of the
|
||||
endpoints HPA. Therefore, it has to be changed to the following:
|
||||
|
||||
"The total number of consecutive bytes of HPA this window represents. This value
|
||||
shall be a multiple of NIW * 256 MB.
|
||||
|
||||
On platforms that reserve physical addresses below 4 GB, such as the Low Memory
|
||||
Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might
|
||||
have a size that doesn't align with the NIW * 256 MB constraint.
|
||||
|
||||
Note that the matching intermediate Switch Decoders and the Endpoint Decoders
|
||||
HPA range sizes must still align to the above-mentioned rule, but the memory
|
||||
capacity that exceeds the CFMWS window size won't be accessible.".
|
||||
conventions/cxl-lmh.rst
|
||||
conventions/cxl-atl.rst
|
||||
conventions/template.rst
|
||||
|
|
|
|||
304
Documentation/driver-api/cxl/conventions/cxl-atl.rst
Normal file
304
Documentation/driver-api/cxl/conventions/cxl-atl.rst
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
ACPI PRM CXL Address Translation
|
||||
================================
|
||||
|
||||
Document
|
||||
--------
|
||||
|
||||
CXL Revision 3.2, Version 1.0
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
SPDX-License Identifier: CC-BY-4.0
|
||||
|
||||
Creator/Contributors
|
||||
--------------------
|
||||
|
||||
- Robert Richter, AMD et al.
|
||||
|
||||
Summary of the Change
|
||||
---------------------
|
||||
|
||||
The CXL Fixed Memory Window Structures (CFMWS) describe zero or more Host
|
||||
Physical Address (HPA) windows associated with one or more CXL Host Bridges.
|
||||
Each HPA range of a CXL Host Bridge is represented by a CFMWS entry. An HPA
|
||||
range may include addresses currently assigned to CXL.mem devices, or an OS may
|
||||
assign ranges from an address window to a device.
|
||||
|
||||
Host-managed Device Memory is Device-attached memory that is mapped to system
|
||||
coherent address space and accessible to the Host using standard write-back
|
||||
semantics. The managed address range is configured in the CXL HDM Decoder
|
||||
registers of the device. An HDM Decoder in a device is responsible for
|
||||
converting HPA into DPA by stripping off specific address bits.
|
||||
|
||||
CXL devices and CXL bridges use the same HPA space. It is common across all
|
||||
components that belong to the same host domain. The view of the address region
|
||||
must be consistent on the CXL.mem path between the Host and the Device.
|
||||
|
||||
This is described in the *CXL 3.2 specification* (Table 1-1, 3.3.1,
|
||||
8.2.4.20, 9.13.1, 9.18.1.3). [#cxl-spec-3.2]_
|
||||
|
||||
Depending on the interconnect architecture of the platform, components attached
|
||||
to a host may not share the same host physical address space. Those platforms
|
||||
need address translation to convert an HPA between the host and the attached
|
||||
component, such as a CXL device. The translation mechanism is host-specific and
|
||||
implementation dependent.
|
||||
|
||||
For example, x86 AMD platforms use a Data Fabric that manages access to physical
|
||||
memory. Devices have their own memory space and can be configured to use
|
||||
'Normalized addresses' different from System Physical Addresses (SPA). Address
|
||||
translation is then needed. For details, see
|
||||
:doc:`x86 AMD Address Translation </admin-guide/RAS/address-translation>`.
|
||||
|
||||
Those AMD platforms provide PRM [#prm-spec]_ handlers in firmware to perform
|
||||
various types of address translation, including for CXL endpoints. AMD Zen5
|
||||
systems implement the ACPI PRM CXL Address Translation firmware call. The ACPI
|
||||
PRM handler has a specific GUID to uniquely identify platforms with support for
|
||||
Normalized addressing. This is documented in the *ACPI v6.5 Porting Guide*
|
||||
(Address Translation - CXL DPA to System Physical Address). [#amd-ppr-58088]_
|
||||
|
||||
When in Normalized address mode, HDM decoder address ranges must be configured
|
||||
and handled differently. Hardware addresses used in the HDM decoder
|
||||
configurations of an endpoint are not SPA and need to be translated from the
|
||||
address range of the endpoint to that of the CXL host bridge. This is especially
|
||||
important for finding an endpoint's associated CXL Host Bridge and HPA window
|
||||
described in the CFMWS. Additionally, the interleave decoding is done by the
|
||||
Data Fabric and the endpoint does not perform decoding when converting HPA to
|
||||
DPA. Instead, interleaving is switched off for the endpoint (1-way). Finally,
|
||||
address translation might also be needed to inspect the endpoint's hardware
|
||||
addresses, such as during profiling, tracing, or error handling.
|
||||
|
||||
For example, with Normalized addressing the HDM decoders could look as follows::
|
||||
|
||||
-------------------------------
|
||||
| Root Decoder (CFMWS) |
|
||||
| SPA Range: 0x850000000 |
|
||||
| Size: 0x8000000000 (512 GB) |
|
||||
| Interleave Ways: 1 |
|
||||
-------------------------------
|
||||
|
|
||||
v
|
||||
-------------------------------
|
||||
| Host Bridge Decoder (HDM) |
|
||||
| SPA Range: 0x850000000 |
|
||||
| Size: 0x8000000000 (512 GB) |
|
||||
| Interleave Ways: 4 |
|
||||
| Targets: endpoint5,8,11,13 |
|
||||
| Granularity: 256 |
|
||||
-------------------------------
|
||||
|
|
||||
-----------------------------+------------------------------
|
||||
| | | |
|
||||
v v v v
|
||||
------------------- ------------------- ------------------- -------------------
|
||||
| endpoint5 | | endpoint8 | | endpoint11 | | endpoint13 |
|
||||
| decoder5.0 | | decoder8.0 | | decoder11.0 | | decoder13.0 |
|
||||
| PCIe: | | PCIe: | | PCIe: | | PCIe: |
|
||||
| 0000:e2:00.0 | | 0000:e3:00.0 | | 0000:e4:00.0 | | 0000:e1:00.0 |
|
||||
| DPA: | | DPA: | | DPA: | | DPA: |
|
||||
| Start: 0x0 | | Start: 0x0 | | Start: 0x0 | | Start: 0x0 |
|
||||
| Size: | | Size: | | Size: | | Size: |
|
||||
| 0x2000000000 | | 0x2000000000 | | 0x2000000000 | | 0x2000000000 |
|
||||
| (128 GB) | | (128 GB) | | (128 GB) | | (128 GB) |
|
||||
| Interleaving: | | Interleaving: | | Interleaving: | | Interleaving: |
|
||||
| Ways: 1 | | Ways: 1 | | Ways: 1 | | Ways: 1 |
|
||||
| Gran: 256 | | Gran: 256 | | Gran: 256 | | Gran: 256 |
|
||||
------------------- ------------------- ------------------- -------------------
|
||||
| | | |
|
||||
v v v v
|
||||
DPA DPA DPA DPA
|
||||
|
||||
This shows the representation in sysfs:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
/sys/bus/cxl/devices/endpoint5/decoder5.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/endpoint5/decoder5.0/interleave_ways:1
|
||||
/sys/bus/cxl/devices/endpoint5/decoder5.0/size:0x2000000000
|
||||
/sys/bus/cxl/devices/endpoint5/decoder5.0/start:0x0
|
||||
/sys/bus/cxl/devices/endpoint8/decoder8.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/endpoint8/decoder8.0/interleave_ways:1
|
||||
/sys/bus/cxl/devices/endpoint8/decoder8.0/size:0x2000000000
|
||||
/sys/bus/cxl/devices/endpoint8/decoder8.0/start:0x0
|
||||
/sys/bus/cxl/devices/endpoint11/decoder11.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/endpoint11/decoder11.0/interleave_ways:1
|
||||
/sys/bus/cxl/devices/endpoint11/decoder11.0/size:0x2000000000
|
||||
/sys/bus/cxl/devices/endpoint11/decoder11.0/start:0x0
|
||||
/sys/bus/cxl/devices/endpoint13/decoder13.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/endpoint13/decoder13.0/interleave_ways:1
|
||||
/sys/bus/cxl/devices/endpoint13/decoder13.0/size:0x2000000000
|
||||
/sys/bus/cxl/devices/endpoint13/decoder13.0/start:0x0
|
||||
|
||||
Note that the endpoint interleaving configurations use direct mapping (1-way).
|
||||
|
||||
With PRM calls, the kernel can determine the following mappings:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
cxl decoder5.0: address mapping found for 0000:e2:00.0 (hpa -> spa):
|
||||
0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
|
||||
cxl decoder8.0: address mapping found for 0000:e3:00.0 (hpa -> spa):
|
||||
0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
|
||||
cxl decoder11.0: address mapping found for 0000:e4:00.0 (hpa -> spa):
|
||||
0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
|
||||
cxl decoder13.0: address mapping found for 0000:e1:00.0 (hpa -> spa):
|
||||
0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
|
||||
|
||||
The corresponding CXL host bridge (HDM) decoders and root decoder (CFMWS) match
|
||||
the calculated endpoint mappings shown:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/interleave_ways:4
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/size:0x8000000000
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/start:0x850000000
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/target_list:0,1,2,3
|
||||
/sys/bus/cxl/devices/port1/decoder1.0/target_type:expander
|
||||
/sys/bus/cxl/devices/root0/decoder0.0/interleave_granularity:256
|
||||
/sys/bus/cxl/devices/root0/decoder0.0/interleave_ways:1
|
||||
/sys/bus/cxl/devices/root0/decoder0.0/size:0x8000000000
|
||||
/sys/bus/cxl/devices/root0/decoder0.0/start:0x850000000
|
||||
/sys/bus/cxl/devices/root0/decoder0.0/target_list:7
|
||||
|
||||
The following changes to the specification are needed:
|
||||
|
||||
* Allow a CXL device to be in an HPA space other than the host's address space.
|
||||
|
||||
* Allow the platform to use implementation-specific address translation when
|
||||
crossing memory domains on the CXL.mem path between the host and the device.
|
||||
|
||||
* Define a PRM handler method for converting device addresses to SPAs.
|
||||
|
||||
* Specify that the platform shall provide the PRM handler method to the
|
||||
Operating System to detect Normalized addressing and for determining Endpoint
|
||||
SPA ranges and interleaving configurations.
|
||||
|
||||
* Add reference to:
|
||||
|
||||
| Platform Runtime Mechanism Specification, Version 1.1 – November 2020
|
||||
| https://uefi.org/sites/default/files/resources/PRM_Platform_Runtime_Mechanism_1_1_release_candidate.pdf
|
||||
|
||||
Benefits of the Change
|
||||
----------------------
|
||||
|
||||
Without the change, the Operating System may be unable to determine the memory
|
||||
region and Root Decoder for an Endpoint and its corresponding HDM decoder.
|
||||
Region creation would fail. Platforms with a different interconnect architecture
|
||||
would fail to set up and use CXL.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [#cxl-spec-3.2] Compute Express Link Specification, Revision 3.2, Version 1.0,
|
||||
https://www.computeexpresslink.org/
|
||||
|
||||
.. [#amd-ppr-58088] AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh,
|
||||
ACPI v6.5 Porting Guide, Publication # 58088,
|
||||
https://www.amd.com/en/search/documentation/hub.html
|
||||
|
||||
.. [#prm-spec] Platform Runtime Mechanism, Version: 1.1,
|
||||
https://uefi.org/sites/default/files/resources/PRM_Platform_Runtime_Mechanism_1_1_release_candidate.pdf
|
||||
|
||||
Detailed Description of the Change
|
||||
----------------------------------
|
||||
|
||||
The following describes the necessary changes to the *CXL 3.2 specification*
|
||||
[#cxl-spec-3.2]_:
|
||||
|
||||
Add the following reference to the table:
|
||||
|
||||
Table 1-2. Reference Documents
|
||||
|
||||
+----------------------------+-------------------+---------------------------+
|
||||
| Document | Chapter Reference | Document No./Location |
|
||||
+============================+===================+===========================+
|
||||
| Platform Runtime Mechanism | Chapter 8, 9 | https://www.uefi.org/acpi |
|
||||
| Version: 1.1 | | |
|
||||
+----------------------------+-------------------+---------------------------+
|
||||
|
||||
Add the following paragraphs to the end of the section:
|
||||
|
||||
**8.2.4.20 CXL HDM Decoder Capability Structure**
|
||||
|
||||
"A device may use an HPA space that is not common to other components of the
|
||||
host domain. The platform is responsible for address translation when crossing
|
||||
HPA spaces. The Operating System must determine the interleaving configuration
|
||||
and perform address translation to the HPA ranges of the HDM decoders as needed.
|
||||
The translation mechanism is host-specific and implementation dependent.
|
||||
|
||||
The platform indicates support of independent HPA spaces and the need for
|
||||
address translation by providing a Platform Runtime Mechanism (PRM) handler. The
|
||||
OS shall use that handler to perform the necessary translations from the DPA
|
||||
space to the HPA space. The handler is defined in Section 9.18.4 *PRM Handler
|
||||
for CXL DPA to System Physical Address Translation*."
|
||||
|
||||
Add the following section and sub-section including tables:
|
||||
|
||||
**9.18.4 PRM Handler for CXL DPA to System Physical Address Translation**
|
||||
|
||||
"A platform may be configured to use 'Normalized addresses'. Host physical
|
||||
address (HPA) spaces are component-specific and differ from system physical
|
||||
addresses (SPAs). The endpoint has its own physical address space. All requests
|
||||
presented to the device already use Device Physical Addresses (DPAs). The CXL
|
||||
endpoint decoders have interleaving disabled (1-way interleaving) and the device
|
||||
does not perform HPA decoding to determine a DPA.
|
||||
|
||||
The platform provides a PRM handler for CXL DPA to System Physical Address
|
||||
Translation. The PRM handler translates a Device Physical Address (DPA) to a
|
||||
System Physical Address (SPA) for a specified CXL endpoint. In the address space
|
||||
of the host, SPA and HPA are equivalent, and the OS shall use this handler to
|
||||
determine the HPA that corresponds to a device address, for example when
|
||||
configuring HDM decoders on platforms with Normalized addressing. The GUID and
|
||||
the parameter buffer format of the handler are specified in section 9.18.4.1. If
|
||||
the OS identifies the PRM handler, the platform supports Normalized addressing
|
||||
and the OS must perform DPA address translation as needed."
|
||||
|
||||
**9.18.4.1 PRM Handler Invocation**
|
||||
|
||||
"The OS calls the PRM handler for CXL DPA to System Physical Address Translation
|
||||
using the direct invocation mechanism. Details of calling a PRM handler are
|
||||
described in the Platform Runtime Mechanism (PRM) specification.
|
||||
|
||||
The PRM handler is identified by the following GUID:
|
||||
|
||||
EE41B397-25D4-452C-AD54-48C6E3480B94
|
||||
|
||||
The caller allocates and prepares a Parameter Buffer, then passes the PRM
|
||||
handler GUID and a pointer to the Parameter Buffer to invoke the handler. The
|
||||
Parameter Buffer is described in Table 9-32."
|
||||
|
||||
**Table 9-32. PRM Parameter Buffer used for CXL DPA to System Physical Address Translation**
|
||||
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
| Byte Offset | Length in | Description |
|
||||
| | Bytes | |
|
||||
+=============+===========+========================================================================+
|
||||
| 00h | 8 | **CXL Device Physical Address (DPA)**: CXL DPA (e.g., from |
|
||||
| | | CXL Component Event Log) |
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
| 08h | 4 | **CXL Endpoint SBDF**: |
|
||||
| | | |
|
||||
| | | - Byte 3 - PCIe Segment |
|
||||
| | | - Byte 2 - Bus Number |
|
||||
| | | - Byte 1: |
|
||||
| | | - Device Number Bits[7:3] |
|
||||
| | | - Function Number Bits[2:0] |
|
||||
| | | - Byte 0 - RESERVED (MBZ) |
|
||||
| | | |
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
| 0Ch | 8 | **Output Buffer**: Virtual Address Pointer to the buffer, |
|
||||
| | | as defined in Table 9-33. |
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
|
||||
**Table 9-33. PRM Output Buffer used for CXL DPA to System Physical Address Translation**
|
||||
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
| Byte Offset | Length in | Description |
|
||||
| | Bytes | |
|
||||
+=============+===========+========================================================================+
|
||||
| 00h | 8 | **System Physical Address (SPA)**: The SPA converted |
|
||||
| | | from the CXL DPA. |
|
||||
+-------------+-----------+------------------------------------------------------------------------+
|
||||
135
Documentation/driver-api/cxl/conventions/cxl-lmh.rst
Normal file
135
Documentation/driver-api/cxl/conventions/cxl-lmh.rst
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders
|
||||
============================================================================
|
||||
|
||||
Document
|
||||
--------
|
||||
|
||||
CXL Revision 3.2, Version 1.0
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
SPDX-License Identifier: CC-BY-4.0
|
||||
|
||||
Creator/Contributors
|
||||
--------------------
|
||||
|
||||
- Fabio M. De Francesco, Intel
|
||||
- Dan J. Williams, Intel
|
||||
- Mahesh Natu, Intel
|
||||
|
||||
Summary of the Change
|
||||
---------------------
|
||||
|
||||
According to the current Compute Express Link (CXL) Specifications (Revision
|
||||
3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero
|
||||
or more Host Physical Address (HPA) windows associated with each CXL Host
|
||||
Bridge. Each window represents a contiguous HPA range that may be interleaved
|
||||
across one or more targets, including CXL Host Bridges. Each window has a set
|
||||
of restrictions that govern its usage. It is the Operating System-directed
|
||||
configuration and Power Management (OSPM) responsibility to utilize each window
|
||||
for the specified use.
|
||||
|
||||
Table 9-22 of the current CXL Specifications states that the Window Size field
|
||||
contains the total number of consecutive bytes of HPA this window describes.
|
||||
This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB.
|
||||
|
||||
Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a
|
||||
memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases,
|
||||
the CFMWS Range Size may not adhere to the NIW * 256 MB rule.
|
||||
|
||||
The HPA represents the actual physical memory address space that the CXL devices
|
||||
can decode and respond to, while the System Physical Address (SPA), a related
|
||||
but distinct concept, represents the system-visible address space that users can
|
||||
direct transaction to and so it excludes reserved regions.
|
||||
|
||||
BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms
|
||||
with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole,
|
||||
resulting in lost capacity in the Endpoints with no SPA to map to that part of
|
||||
the HPA range that intersects the hole.
|
||||
|
||||
E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB:
|
||||
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
| Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways |
|
||||
+========+============+===================+==================+===================+======+
|
||||
| 0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 |
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
| 1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 |
|
||||
+--------+------------+-------------------+------------------+-------------------+------+
|
||||
|
||||
HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of
|
||||
a 12 ways region and all the intermediate Switch Decoders. They are configured
|
||||
by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of
|
||||
3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root
|
||||
Decoder HPA range that results smaller (2GB) than that of the Switch and
|
||||
Endpoint Decoders in the hierarchy (3GB).
|
||||
|
||||
This creates 2 issues which lead to a failure to construct a region:
|
||||
|
||||
1) A mismatch in region size between root and any HDM decoder. The root decoders
|
||||
will always be smaller due to the trim.
|
||||
|
||||
2) The trim causes the root decoder to violate the (NIW * 256MB) rule.
|
||||
|
||||
This change allows a region with a base address of 0GB to bypass these checks to
|
||||
allow for region creation with the trimmed root decoder address range.
|
||||
|
||||
This change does not allow for any other arbitrary region to violate these
|
||||
checks - it is intended exclusively to enable x86 platforms which map CXL memory
|
||||
under 4GB.
|
||||
|
||||
Despite the HDM decoders covering the PCIE hole HPA region, it is expected that
|
||||
the platform will never route address accesses to the CXL complex because the
|
||||
root decoder only covers the trimmed region (which excludes this). This is
|
||||
outside the ability of Linux to enforce.
|
||||
|
||||
On the example platform, only the first 2GB will be potentially usable, but
|
||||
Linux, aiming to adhere to the current specifications, fails to construct
|
||||
Regions and attach Endpoint and intermediate Switch Decoders to them.
|
||||
|
||||
There are several points of failure that due to the expectation that the Root
|
||||
Decoder HPA size, that is equal to the CFMWS from which it is configured, has
|
||||
to be greater or equal to the matching Switch and Endpoint HDM Decoders.
|
||||
|
||||
In order to succeed with construction and attachment, Linux must construct a
|
||||
Region with Root Decoder HPA range size, and then attach to that all the
|
||||
intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy
|
||||
regardless of their range sizes.
|
||||
|
||||
Benefits of the Change
|
||||
----------------------
|
||||
|
||||
Without the change, the OSPM wouldn't match intermediate Switch and Endpoint
|
||||
Decoders with Root Decoders configured with CFMWS HPA sizes that don't align
|
||||
with the NIW * 256MB constraint, and so it leads to lost memdev capacity.
|
||||
|
||||
This change allows the OSPM to construct Regions and attach intermediate Switch
|
||||
and Endpoint Decoders to them, so that the addressable part of the memory
|
||||
devices total capacity is made available to the users.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Compute Express Link Specification Revision 3.2, Version 1.0
|
||||
<https://www.computeexpresslink.org/>
|
||||
|
||||
Detailed Description of the Change
|
||||
----------------------------------
|
||||
|
||||
The description of the Window Size field in table 9-22 needs to account for
|
||||
platforms with Low Memory Holes, where SPA ranges might be subsets of the
|
||||
endpoints HPA. Therefore, it has to be changed to the following:
|
||||
|
||||
"The total number of consecutive bytes of HPA this window represents. This value
|
||||
shall be a multiple of NIW * 256 MB.
|
||||
|
||||
On platforms that reserve physical addresses below 4 GB, such as the Low Memory
|
||||
Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might
|
||||
have a size that doesn't align with the NIW * 256 MB constraint.
|
||||
|
||||
Note that the matching intermediate Switch Decoders and the Endpoint Decoders
|
||||
HPA range sizes must still align to the above-mentioned rule, but the memory
|
||||
capacity that exceeds the CFMWS window size won't be accessible.".
|
||||
37
Documentation/driver-api/cxl/conventions/template.rst
Normal file
37
Documentation/driver-api/cxl/conventions/template.rst
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. :: Template Title here:
|
||||
|
||||
Template File
|
||||
=============
|
||||
|
||||
Document
|
||||
--------
|
||||
CXL Revision <rev>, Version <ver>
|
||||
|
||||
License
|
||||
-------
|
||||
SPDX-License Identifier: CC-BY-4.0
|
||||
|
||||
Creator/Contributors
|
||||
--------------------
|
||||
|
||||
Summary of the Change
|
||||
---------------------
|
||||
|
||||
<Detail the conflict with the specification and where available the
|
||||
assumptions and tradeoffs taken by the hardware platform.>
|
||||
|
||||
Benefits of the Change
|
||||
----------------------
|
||||
|
||||
<Detail what happens if platforms and Linux do not adopt this
|
||||
convention.>
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
Detailed Description of the Change
|
||||
----------------------------------
|
||||
|
||||
<Propose spec language that corrects the conflict.>
|
||||
|
|
@ -237,4 +237,9 @@ config CXL_RAS
|
|||
def_bool y
|
||||
depends on ACPI_APEI_GHES && PCIEAER && CXL_BUS
|
||||
|
||||
config CXL_ATL
|
||||
def_bool y
|
||||
depends on CXL_REGION
|
||||
depends on ACPI_PRMT && AMD_NB
|
||||
|
||||
endif
|
||||
|
|
|
|||
|
|
@ -318,10 +318,6 @@ static int cxl_acpi_qos_class(struct cxl_root *cxl_root,
|
|||
return cxl_acpi_evaluate_qtg_dsm(handle, coord, entries, qos_class);
|
||||
}
|
||||
|
||||
static const struct cxl_root_ops acpi_root_ops = {
|
||||
.qos_class = cxl_acpi_qos_class,
|
||||
};
|
||||
|
||||
static void del_cxl_resource(struct resource *res)
|
||||
{
|
||||
if (!res)
|
||||
|
|
@ -904,11 +900,14 @@ static int cxl_acpi_probe(struct platform_device *pdev)
|
|||
cxl_res->end = -1;
|
||||
cxl_res->flags = IORESOURCE_MEM;
|
||||
|
||||
cxl_root = devm_cxl_add_root(host, &acpi_root_ops);
|
||||
cxl_root = devm_cxl_add_root(host);
|
||||
if (IS_ERR(cxl_root))
|
||||
return PTR_ERR(cxl_root);
|
||||
cxl_root->ops.qos_class = cxl_acpi_qos_class;
|
||||
root_port = &cxl_root->port;
|
||||
|
||||
cxl_setup_prm_address_translation(cxl_root);
|
||||
|
||||
rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
|
||||
add_host_bridge_dport);
|
||||
if (rc < 0)
|
||||
|
|
@ -989,8 +988,12 @@ static void __exit cxl_acpi_exit(void)
|
|||
cxl_bus_drain();
|
||||
}
|
||||
|
||||
/* load before dax_hmem sees 'Soft Reserved' CXL ranges */
|
||||
subsys_initcall(cxl_acpi_init);
|
||||
/*
|
||||
* Load before dax_hmem sees 'Soft Reserved' CXL ranges. Use
|
||||
* subsys_initcall_sync() since there is an order dependency with
|
||||
* subsys_initcall(efisubsys_init), which must run first.
|
||||
*/
|
||||
subsys_initcall_sync(cxl_acpi_init);
|
||||
|
||||
/*
|
||||
* Arrange for host-bridge ports to be active synchronous with
|
||||
|
|
|
|||
|
|
@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_FEATURES) += features.o
|
|||
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
|
||||
cxl_core-$(CONFIG_CXL_RAS) += ras.o
|
||||
cxl_core-$(CONFIG_CXL_RAS) += ras_rch.o
|
||||
cxl_core-$(CONFIG_CXL_ATL) += atl.o
|
||||
|
|
|
|||
211
drivers/cxl/core/atl.c
Normal file
211
drivers/cxl/core/atl.c
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/prmt.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/acpi.h>
|
||||
|
||||
#include <cxlmem.h>
|
||||
#include "core.h"
|
||||
|
||||
/*
|
||||
* PRM Address Translation - CXL DPA to System Physical Address
|
||||
*
|
||||
* Reference:
|
||||
*
|
||||
* AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh
|
||||
* ACPI v6.5 Porting Guide, Publication # 58088
|
||||
*/
|
||||
|
||||
static const guid_t prm_cxl_dpa_spa_guid =
|
||||
GUID_INIT(0xee41b397, 0x25d4, 0x452c, 0xad, 0x54, 0x48, 0xc6, 0xe3,
|
||||
0x48, 0x0b, 0x94);
|
||||
|
||||
struct prm_cxl_dpa_spa_data {
|
||||
u64 dpa;
|
||||
u8 reserved;
|
||||
u8 devfn;
|
||||
u8 bus;
|
||||
u8 segment;
|
||||
u64 *spa;
|
||||
} __packed;
|
||||
|
||||
static u64 prm_cxl_dpa_spa(struct pci_dev *pci_dev, u64 dpa)
|
||||
{
|
||||
struct prm_cxl_dpa_spa_data data;
|
||||
u64 spa;
|
||||
int rc;
|
||||
|
||||
data = (struct prm_cxl_dpa_spa_data) {
|
||||
.dpa = dpa,
|
||||
.devfn = pci_dev->devfn,
|
||||
.bus = pci_dev->bus->number,
|
||||
.segment = pci_domain_nr(pci_dev->bus),
|
||||
.spa = &spa,
|
||||
};
|
||||
|
||||
rc = acpi_call_prm_handler(prm_cxl_dpa_spa_guid, &data);
|
||||
if (rc) {
|
||||
pci_dbg(pci_dev, "failed to get SPA for %#llx: %d\n", dpa, rc);
|
||||
return ULLONG_MAX;
|
||||
}
|
||||
|
||||
pci_dbg(pci_dev, "PRM address translation: DPA -> SPA: %#llx -> %#llx\n", dpa, spa);
|
||||
|
||||
return spa;
|
||||
}
|
||||
|
||||
static int cxl_prm_setup_root(struct cxl_root *cxl_root, void *data)
|
||||
{
|
||||
struct cxl_region_context *ctx = data;
|
||||
struct cxl_endpoint_decoder *cxled = ctx->cxled;
|
||||
struct cxl_decoder *cxld = &cxled->cxld;
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct range hpa_range = ctx->hpa_range;
|
||||
struct pci_dev *pci_dev;
|
||||
u64 spa_len, len;
|
||||
u64 addr, base_spa, base;
|
||||
int ways, gran;
|
||||
|
||||
/*
|
||||
* When Normalized Addressing is enabled, the endpoint maintains a 1:1
|
||||
* mapping between HPA and DPA. If disabled, skip address translation
|
||||
* and perform only a range check.
|
||||
*/
|
||||
if (hpa_range.start != cxled->dpa_res->start)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Endpoints are programmed passthrough in Normalized Addressing mode.
|
||||
*/
|
||||
if (ctx->interleave_ways != 1) {
|
||||
dev_dbg(&cxld->dev, "unexpected interleaving config: ways: %d granularity: %d\n",
|
||||
ctx->interleave_ways, ctx->interleave_granularity);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (!cxlmd || !dev_is_pci(cxlmd->dev.parent)) {
|
||||
dev_dbg(&cxld->dev, "No endpoint found: %s, range %#llx-%#llx\n",
|
||||
dev_name(cxld->dev.parent), hpa_range.start,
|
||||
hpa_range.end);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
pci_dev = to_pci_dev(cxlmd->dev.parent);
|
||||
|
||||
/* Translate HPA range to SPA. */
|
||||
base = hpa_range.start;
|
||||
hpa_range.start = prm_cxl_dpa_spa(pci_dev, hpa_range.start);
|
||||
hpa_range.end = prm_cxl_dpa_spa(pci_dev, hpa_range.end);
|
||||
base_spa = hpa_range.start;
|
||||
|
||||
if (hpa_range.start == ULLONG_MAX || hpa_range.end == ULLONG_MAX) {
|
||||
dev_dbg(cxld->dev.parent,
|
||||
"CXL address translation: Failed to translate HPA range: %#llx-%#llx:%#llx-%#llx(%s)\n",
|
||||
hpa_range.start, hpa_range.end, ctx->hpa_range.start,
|
||||
ctx->hpa_range.end, dev_name(&cxld->dev));
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since translated addresses include the interleaving offsets, align
|
||||
* the range to 256 MB.
|
||||
*/
|
||||
hpa_range.start = ALIGN_DOWN(hpa_range.start, SZ_256M);
|
||||
hpa_range.end = ALIGN(hpa_range.end, SZ_256M) - 1;
|
||||
|
||||
len = range_len(&ctx->hpa_range);
|
||||
spa_len = range_len(&hpa_range);
|
||||
if (!len || !spa_len || spa_len % len) {
|
||||
dev_dbg(cxld->dev.parent,
|
||||
"CXL address translation: HPA range not contiguous: %#llx-%#llx:%#llx-%#llx(%s)\n",
|
||||
hpa_range.start, hpa_range.end, ctx->hpa_range.start,
|
||||
ctx->hpa_range.end, dev_name(&cxld->dev));
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
ways = spa_len / len;
|
||||
gran = SZ_256;
|
||||
|
||||
/*
|
||||
* Determine interleave granularity
|
||||
*
|
||||
* Note: The position of the chunk from one interleaving block to the
|
||||
* next may vary and thus cannot be considered constant. Address offsets
|
||||
* larger than the interleaving block size cannot be used to calculate
|
||||
* the granularity.
|
||||
*/
|
||||
if (ways > 1) {
|
||||
while (gran <= SZ_16M) {
|
||||
addr = prm_cxl_dpa_spa(pci_dev, base + gran);
|
||||
if (addr != base_spa + gran)
|
||||
break;
|
||||
gran <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (gran > SZ_16M) {
|
||||
dev_dbg(cxld->dev.parent,
|
||||
"CXL address translation: Cannot determine granularity: %#llx-%#llx:%#llx-%#llx(%s)\n",
|
||||
hpa_range.start, hpa_range.end, ctx->hpa_range.start,
|
||||
ctx->hpa_range.end, dev_name(&cxld->dev));
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* The current kernel implementation does not support endpoint
|
||||
* setup with Normalized Addressing. It only translates an
|
||||
* endpoint's DPA to the SPA range of the host bridge.
|
||||
* Therefore, the endpoint address range cannot be determined,
|
||||
* making a non-auto setup impossible. If a decoder requires
|
||||
* address translation, reprogramming should be disabled and
|
||||
* the decoder locked.
|
||||
*
|
||||
* The BIOS, however, provides all the necessary address
|
||||
* translation data, which the kernel can use to reconfigure
|
||||
* endpoint decoders with normalized addresses. Locking the
|
||||
* decoders in the BIOS would prevent a capable kernel (or
|
||||
* other operating systems) from shutting down auto-generated
|
||||
* regions and managing resources dynamically.
|
||||
*
|
||||
* Indicate that Normalized Addressing is enabled.
|
||||
*/
|
||||
cxld->flags |= CXL_DECODER_F_LOCK;
|
||||
cxld->flags |= CXL_DECODER_F_NORMALIZED_ADDRESSING;
|
||||
|
||||
ctx->hpa_range = hpa_range;
|
||||
ctx->interleave_ways = ways;
|
||||
ctx->interleave_granularity = gran;
|
||||
|
||||
dev_dbg(&cxld->dev,
|
||||
"address mapping found for %s (hpa -> spa): %#llx+%#llx -> %#llx+%#llx ways:%d granularity:%d\n",
|
||||
dev_name(cxlmd->dev.parent), base, len, hpa_range.start,
|
||||
spa_len, ways, gran);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cxl_setup_prm_address_translation(struct cxl_root *cxl_root)
|
||||
{
|
||||
struct device *host = cxl_root->port.uport_dev;
|
||||
u64 spa;
|
||||
struct prm_cxl_dpa_spa_data data = { .spa = &spa };
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* Applies only to PCIe Host Bridges which are children of the CXL Root
|
||||
* Device (HID=“ACPI0017”). Check this and drop cxl_test instances.
|
||||
*/
|
||||
if (!acpi_match_device(host->driver->acpi_match_table, host))
|
||||
return;
|
||||
|
||||
/* Check kernel (-EOPNOTSUPP) and firmware support (-ENODEV) */
|
||||
rc = acpi_call_prm_handler(prm_cxl_dpa_spa_guid, &data);
|
||||
if (rc == -EOPNOTSUPP || rc == -ENODEV)
|
||||
return;
|
||||
|
||||
cxl_root->ops.translation_setup_root = cxl_prm_setup_root;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(cxl_setup_prm_address_translation, "CXL");
|
||||
|
|
@ -213,7 +213,7 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
|
|||
if (!cxl_root)
|
||||
return -ENODEV;
|
||||
|
||||
if (!cxl_root->ops || !cxl_root->ops->qos_class)
|
||||
if (!cxl_root->ops.qos_class)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xa_for_each(dsmas_xa, index, dent) {
|
||||
|
|
@ -221,9 +221,9 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
|
|||
|
||||
cxl_coordinates_combine(dent->coord, dent->cdat_coord, ep_c);
|
||||
dent->entries = 1;
|
||||
rc = cxl_root->ops->qos_class(cxl_root,
|
||||
&dent->coord[ACCESS_COORDINATE_CPU],
|
||||
1, &qos_class);
|
||||
rc = cxl_root->ops.qos_class(cxl_root,
|
||||
&dent->coord[ACCESS_COORDINATE_CPU],
|
||||
1, &qos_class);
|
||||
if (rc != 1)
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,14 @@ enum cxl_detach_mode {
|
|||
};
|
||||
|
||||
#ifdef CONFIG_CXL_REGION
|
||||
|
||||
struct cxl_region_context {
|
||||
struct cxl_endpoint_decoder *cxled;
|
||||
struct range hpa_range;
|
||||
int interleave_ways;
|
||||
int interleave_granularity;
|
||||
};
|
||||
|
||||
extern struct device_attribute dev_attr_create_pmem_region;
|
||||
extern struct device_attribute dev_attr_create_ram_region;
|
||||
extern struct device_attribute dev_attr_delete_region;
|
||||
|
|
|
|||
|
|
@ -957,19 +957,15 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
|
|||
}
|
||||
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, "CXL");
|
||||
|
||||
struct cxl_root *devm_cxl_add_root(struct device *host,
|
||||
const struct cxl_root_ops *ops)
|
||||
struct cxl_root *devm_cxl_add_root(struct device *host)
|
||||
{
|
||||
struct cxl_root *cxl_root;
|
||||
struct cxl_port *port;
|
||||
|
||||
port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
|
||||
if (IS_ERR(port))
|
||||
return ERR_CAST(port);
|
||||
|
||||
cxl_root = to_cxl_root(port);
|
||||
cxl_root->ops = ops;
|
||||
return cxl_root;
|
||||
return to_cxl_root(port);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, "CXL");
|
||||
|
||||
|
|
|
|||
|
|
@ -489,9 +489,9 @@ static ssize_t interleave_ways_store(struct device *dev,
|
|||
struct device_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
|
||||
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
|
||||
struct cxl_region *cxlr = to_cxl_region(dev);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
unsigned int val, save;
|
||||
int rc;
|
||||
|
|
@ -552,9 +552,9 @@ static ssize_t interleave_granularity_store(struct device *dev,
|
|||
struct device_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
|
||||
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
|
||||
struct cxl_region *cxlr = to_cxl_region(dev);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
int rc, val;
|
||||
u16 ig;
|
||||
|
|
@ -628,7 +628,7 @@ static DEVICE_ATTR_RO(mode);
|
|||
|
||||
static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
struct resource *res;
|
||||
u64 remainder = 0;
|
||||
|
|
@ -664,6 +664,8 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
|
|||
return PTR_ERR(res);
|
||||
}
|
||||
|
||||
cxlr->hpa_range = DEFINE_RANGE(res->start, res->end);
|
||||
|
||||
p->res = res;
|
||||
p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
|
||||
|
||||
|
|
@ -700,6 +702,8 @@ static int free_hpa(struct cxl_region *cxlr)
|
|||
if (p->state >= CXL_CONFIG_ACTIVE)
|
||||
return -EBUSY;
|
||||
|
||||
cxlr->hpa_range = DEFINE_RANGE(0, -1);
|
||||
|
||||
cxl_region_iomem_release(cxlr);
|
||||
p->state = CXL_CONFIG_IDLE;
|
||||
return 0;
|
||||
|
|
@ -1093,14 +1097,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void cxl_region_set_lock(struct cxl_region *cxlr,
|
||||
struct cxl_decoder *cxld)
|
||||
static void cxl_region_setup_flags(struct cxl_region *cxlr,
|
||||
struct cxl_decoder *cxld)
|
||||
{
|
||||
if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
|
||||
return;
|
||||
if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) {
|
||||
set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
|
||||
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
}
|
||||
|
||||
set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
|
||||
clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
|
||||
if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags))
|
||||
set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1214,7 +1220,7 @@ static int cxl_port_attach_region(struct cxl_port *port,
|
|||
}
|
||||
}
|
||||
|
||||
cxl_region_set_lock(cxlr, cxld);
|
||||
cxl_region_setup_flags(cxlr, cxld);
|
||||
|
||||
rc = cxl_rr_ep_add(cxl_rr, cxled);
|
||||
if (rc) {
|
||||
|
|
@ -1373,7 +1379,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
|
|||
struct cxl_region *cxlr,
|
||||
struct cxl_endpoint_decoder *cxled)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
|
||||
struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
|
||||
struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
|
||||
|
|
@ -1731,10 +1737,10 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
|
|||
}
|
||||
|
||||
static int cxl_region_attach_position(struct cxl_region *cxlr,
|
||||
struct cxl_root_decoder *cxlrd,
|
||||
struct cxl_endpoint_decoder *cxled,
|
||||
const struct cxl_dport *dport, int pos)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
|
||||
struct cxl_decoder *cxld = &cxlsd->cxld;
|
||||
|
|
@ -1874,6 +1880,7 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
|
|||
/**
|
||||
* cxl_calc_interleave_pos() - calculate an endpoint position in a region
|
||||
* @cxled: endpoint decoder member of given region
|
||||
* @hpa_range: translated HPA range of the endpoint
|
||||
*
|
||||
* The endpoint position is calculated by traversing the topology from
|
||||
* the endpoint to the root decoder and iteratively applying this
|
||||
|
|
@ -1886,11 +1893,11 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range,
|
|||
* Return: position >= 0 on success
|
||||
* -ENXIO on failure
|
||||
*/
|
||||
static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
|
||||
static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled,
|
||||
struct range *hpa_range)
|
||||
{
|
||||
struct cxl_port *iter, *port = cxled_to_port(cxled);
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct range *range = &cxled->cxld.hpa_range;
|
||||
int parent_ways = 0, parent_pos = 0, pos = 0;
|
||||
int rc;
|
||||
|
||||
|
|
@ -1928,7 +1935,8 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
|
|||
if (is_cxl_root(iter))
|
||||
break;
|
||||
|
||||
rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
|
||||
rc = find_pos_and_ways(iter, hpa_range, &parent_pos,
|
||||
&parent_ways);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
|
|
@ -1938,7 +1946,7 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
|
|||
dev_dbg(&cxlmd->dev,
|
||||
"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
|
||||
dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
|
||||
dev_name(&port->dev), range->start, range->end, pos);
|
||||
dev_name(&port->dev), hpa_range->start, hpa_range->end, pos);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
|
@ -1951,7 +1959,7 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr)
|
|||
for (i = 0; i < p->nr_targets; i++) {
|
||||
struct cxl_endpoint_decoder *cxled = p->targets[i];
|
||||
|
||||
cxled->pos = cxl_calc_interleave_pos(cxled);
|
||||
cxled->pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
|
||||
/*
|
||||
* Record that sorting failed, but still continue to calc
|
||||
* cxled->pos so that follow-on code paths can reliably
|
||||
|
|
@ -1971,7 +1979,7 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr)
|
|||
static int cxl_region_attach(struct cxl_region *cxlr,
|
||||
struct cxl_endpoint_decoder *cxled, int pos)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
|
|
@ -2076,8 +2084,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
|
|||
ep_port = cxled_to_port(cxled);
|
||||
dport = cxl_find_dport_by_dev(root_port,
|
||||
ep_port->host_bridge);
|
||||
rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
|
||||
dport, i);
|
||||
rc = cxl_region_attach_position(cxlr, cxled, dport, i);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
|
@ -2100,7 +2107,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
|
||||
rc = cxl_region_attach_position(cxlr, cxled, dport, pos);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
|
|
@ -2136,7 +2143,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
|
|||
struct cxl_endpoint_decoder *cxled = p->targets[i];
|
||||
int test_pos;
|
||||
|
||||
test_pos = cxl_calc_interleave_pos(cxled);
|
||||
test_pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range);
|
||||
dev_dbg(&cxled->cxld.dev,
|
||||
"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
|
||||
(test_pos == cxled->pos) ? "success" : "fail",
|
||||
|
|
@ -2396,8 +2403,8 @@ static const struct attribute_group *region_groups[] = {
|
|||
|
||||
static void cxl_region_release(struct device *dev)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
|
||||
struct cxl_region *cxlr = to_cxl_region(dev);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
int id = atomic_read(&cxlrd->region_id);
|
||||
|
||||
/*
|
||||
|
|
@ -2454,6 +2461,8 @@ static void unregister_region(void *_cxlr)
|
|||
for (i = 0; i < p->interleave_ways; i++)
|
||||
detach_target(cxlr, i);
|
||||
|
||||
cxlr->hpa_range = DEFINE_RANGE(0, -1);
|
||||
|
||||
cxl_region_iomem_release(cxlr);
|
||||
put_device(&cxlr->dev);
|
||||
}
|
||||
|
|
@ -2480,11 +2489,13 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
|
|||
* region id allocations
|
||||
*/
|
||||
get_device(dev->parent);
|
||||
cxlr->cxlrd = cxlrd;
|
||||
cxlr->id = id;
|
||||
|
||||
device_set_pm_not_required(dev);
|
||||
dev->bus = &cxl_bus_type;
|
||||
dev->type = &cxl_region_type;
|
||||
cxlr->id = id;
|
||||
cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
|
||||
cxl_region_setup_flags(cxlr, &cxlrd->cxlsd.cxld);
|
||||
|
||||
return cxlr;
|
||||
}
|
||||
|
|
@ -3246,7 +3257,7 @@ static bool region_is_unaligned_mod3(struct cxl_region *cxlr)
|
|||
u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
|
||||
u64 dpa)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
struct cxl_endpoint_decoder *cxled = NULL;
|
||||
|
|
@ -3256,6 +3267,13 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
|
|||
u8 eiw = 0;
|
||||
int pos;
|
||||
|
||||
/*
|
||||
* Conversion between SPA and DPA is not supported in
|
||||
* Normalized Address mode.
|
||||
*/
|
||||
if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
|
||||
return ULLONG_MAX;
|
||||
|
||||
for (int i = 0; i < p->nr_targets; i++) {
|
||||
if (cxlmd == cxled_to_memdev(p->targets[i])) {
|
||||
cxled = p->targets[i];
|
||||
|
|
@ -3361,7 +3379,7 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
|
|||
struct dpa_result *result)
|
||||
{
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_endpoint_decoder *cxled;
|
||||
u64 hpa, hpa_offset, dpa_offset;
|
||||
u16 eig = 0;
|
||||
|
|
@ -3657,47 +3675,68 @@ err:
|
|||
return rc;
|
||||
}
|
||||
|
||||
static int match_decoder_by_range(struct device *dev, const void *data)
|
||||
static int match_root_decoder(struct device *dev, const void *data)
|
||||
{
|
||||
const struct range *r1, *r2 = data;
|
||||
struct cxl_decoder *cxld;
|
||||
struct cxl_root_decoder *cxlrd;
|
||||
|
||||
if (!is_switch_decoder(dev))
|
||||
if (!is_root_decoder(dev))
|
||||
return 0;
|
||||
|
||||
cxld = to_cxl_decoder(dev);
|
||||
r1 = &cxld->hpa_range;
|
||||
cxlrd = to_cxl_root_decoder(dev);
|
||||
r1 = &cxlrd->cxlsd.cxld.hpa_range;
|
||||
|
||||
return range_contains(r1, r2);
|
||||
}
|
||||
|
||||
static struct cxl_decoder *
|
||||
cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa)
|
||||
static int cxl_root_setup_translation(struct cxl_root *cxl_root,
|
||||
struct cxl_region_context *ctx)
|
||||
{
|
||||
struct device *cxld_dev = device_find_child(&port->dev, hpa,
|
||||
match_decoder_by_range);
|
||||
if (!cxl_root->ops.translation_setup_root)
|
||||
return 0;
|
||||
|
||||
return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL;
|
||||
return cxl_root->ops.translation_setup_root(cxl_root, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, when finished with the device, drop the reference with
|
||||
* put_device() or use the put_cxl_root_decoder helper.
|
||||
*/
|
||||
static struct cxl_root_decoder *
|
||||
cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled)
|
||||
get_cxl_root_decoder(struct cxl_endpoint_decoder *cxled,
|
||||
struct cxl_region_context *ctx)
|
||||
{
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct cxl_port *port = cxled_to_port(cxled);
|
||||
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
|
||||
struct cxl_decoder *root, *cxld = &cxled->cxld;
|
||||
struct range *hpa = &cxld->hpa_range;
|
||||
struct device *cxlrd_dev;
|
||||
int rc;
|
||||
|
||||
root = cxl_port_find_switch_decoder(&cxl_root->port, hpa);
|
||||
if (!root) {
|
||||
/*
|
||||
* Adjust the endpoint's HPA range and interleaving
|
||||
* configuration to the root decoder’s memory space before
|
||||
* setting up the root decoder.
|
||||
*/
|
||||
rc = cxl_root_setup_translation(cxl_root, ctx);
|
||||
if (rc) {
|
||||
dev_err(cxlmd->dev.parent,
|
||||
"%s:%s no CXL window for range %#llx:%#llx\n",
|
||||
dev_name(&cxlmd->dev), dev_name(&cxld->dev),
|
||||
cxld->hpa_range.start, cxld->hpa_range.end);
|
||||
return NULL;
|
||||
"%s:%s Failed to setup translation for address range %#llx:%#llx\n",
|
||||
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
|
||||
ctx->hpa_range.start, ctx->hpa_range.end);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
return to_cxl_root_decoder(&root->dev);
|
||||
cxlrd_dev = device_find_child(&cxl_root->port.dev, &ctx->hpa_range,
|
||||
match_root_decoder);
|
||||
if (!cxlrd_dev) {
|
||||
dev_err(cxlmd->dev.parent,
|
||||
"%s:%s no CXL window for range %#llx:%#llx\n",
|
||||
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
|
||||
ctx->hpa_range.start, ctx->hpa_range.end);
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
|
||||
return to_cxl_root_decoder(cxlrd_dev);
|
||||
}
|
||||
|
||||
static int match_region_by_range(struct device *dev, const void *data)
|
||||
|
|
@ -3719,7 +3758,7 @@ static int match_region_by_range(struct device *dev, const void *data)
|
|||
static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
|
||||
struct resource *res)
|
||||
{
|
||||
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
resource_size_t size = resource_size(res);
|
||||
resource_size_t cache_size, start;
|
||||
|
|
@ -3755,11 +3794,12 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
|
|||
}
|
||||
|
||||
static int __construct_region(struct cxl_region *cxlr,
|
||||
struct cxl_root_decoder *cxlrd,
|
||||
struct cxl_endpoint_decoder *cxled)
|
||||
struct cxl_region_context *ctx)
|
||||
{
|
||||
struct cxl_endpoint_decoder *cxled = ctx->cxled;
|
||||
struct cxl_root_decoder *cxlrd = cxlr->cxlrd;
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct range *hpa = &cxled->cxld.hpa_range;
|
||||
struct range *hpa_range = &ctx->hpa_range;
|
||||
struct cxl_region_params *p;
|
||||
struct resource *res;
|
||||
int rc;
|
||||
|
|
@ -3775,12 +3815,13 @@ static int __construct_region(struct cxl_region *cxlr,
|
|||
}
|
||||
|
||||
set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
|
||||
cxlr->hpa_range = *hpa_range;
|
||||
|
||||
res = kmalloc(sizeof(*res), GFP_KERNEL);
|
||||
if (!res)
|
||||
return -ENOMEM;
|
||||
|
||||
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
|
||||
*res = DEFINE_RES_MEM_NAMED(hpa_range->start, range_len(hpa_range),
|
||||
dev_name(&cxlr->dev));
|
||||
|
||||
rc = cxl_extended_linear_cache_resize(cxlr, res);
|
||||
|
|
@ -3811,8 +3852,8 @@ static int __construct_region(struct cxl_region *cxlr,
|
|||
}
|
||||
|
||||
p->res = res;
|
||||
p->interleave_ways = cxled->cxld.interleave_ways;
|
||||
p->interleave_granularity = cxled->cxld.interleave_granularity;
|
||||
p->interleave_ways = ctx->interleave_ways;
|
||||
p->interleave_granularity = ctx->interleave_granularity;
|
||||
p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
|
||||
|
||||
rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
|
||||
|
|
@ -3832,8 +3873,9 @@ static int __construct_region(struct cxl_region *cxlr,
|
|||
|
||||
/* Establish an empty region covering the given HPA range */
|
||||
static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
|
||||
struct cxl_endpoint_decoder *cxled)
|
||||
struct cxl_region_context *ctx)
|
||||
{
|
||||
struct cxl_endpoint_decoder *cxled = ctx->cxled;
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
struct cxl_port *port = cxlrd_to_port(cxlrd);
|
||||
struct cxl_dev_state *cxlds = cxlmd->cxlds;
|
||||
|
|
@ -3853,7 +3895,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
|
|||
return cxlr;
|
||||
}
|
||||
|
||||
rc = __construct_region(cxlr, cxlrd, cxled);
|
||||
rc = __construct_region(cxlr, ctx);
|
||||
if (rc) {
|
||||
devm_release_action(port->uport_dev, unregister_region, cxlr);
|
||||
return ERR_PTR(rc);
|
||||
|
|
@ -3863,11 +3905,12 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
|
|||
}
|
||||
|
||||
static struct cxl_region *
|
||||
cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
|
||||
cxl_find_region_by_range(struct cxl_root_decoder *cxlrd,
|
||||
struct range *hpa_range)
|
||||
{
|
||||
struct device *region_dev;
|
||||
|
||||
region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
|
||||
region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa_range,
|
||||
match_region_by_range);
|
||||
if (!region_dev)
|
||||
return NULL;
|
||||
|
|
@ -3877,25 +3920,34 @@ cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
|
|||
|
||||
int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
|
||||
{
|
||||
struct range *hpa = &cxled->cxld.hpa_range;
|
||||
struct cxl_region_context ctx;
|
||||
struct cxl_region_params *p;
|
||||
bool attach = false;
|
||||
int rc;
|
||||
|
||||
ctx = (struct cxl_region_context) {
|
||||
.cxled = cxled,
|
||||
.hpa_range = cxled->cxld.hpa_range,
|
||||
.interleave_ways = cxled->cxld.interleave_ways,
|
||||
.interleave_granularity = cxled->cxld.interleave_granularity,
|
||||
};
|
||||
|
||||
struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) =
|
||||
cxl_find_root_decoder(cxled);
|
||||
if (!cxlrd)
|
||||
return -ENXIO;
|
||||
get_cxl_root_decoder(cxled, &ctx);
|
||||
|
||||
if (IS_ERR(cxlrd))
|
||||
return PTR_ERR(cxlrd);
|
||||
|
||||
/*
|
||||
* Ensure that if multiple threads race to construct_region() for @hpa
|
||||
* one does the construction and the others add to that.
|
||||
* Ensure that, if multiple threads race to construct_region()
|
||||
* for the HPA range, one does the construction and the others
|
||||
* add to that.
|
||||
*/
|
||||
mutex_lock(&cxlrd->range_lock);
|
||||
struct cxl_region *cxlr __free(put_cxl_region) =
|
||||
cxl_find_region_by_range(cxlrd, hpa);
|
||||
cxl_find_region_by_range(cxlrd, &ctx.hpa_range);
|
||||
if (!cxlr)
|
||||
cxlr = construct_region(cxlrd, cxled);
|
||||
cxlr = construct_region(cxlrd, &ctx);
|
||||
mutex_unlock(&cxlrd->range_lock);
|
||||
|
||||
rc = PTR_ERR_OR_ZERO(cxlr);
|
||||
|
|
@ -4070,6 +4122,39 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
|
|||
DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
|
||||
cxl_region_debugfs_poison_clear, "%llx\n");
|
||||
|
||||
static int cxl_region_setup_poison(struct cxl_region *cxlr)
|
||||
{
|
||||
struct device *dev = &cxlr->dev;
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
struct dentry *dentry;
|
||||
|
||||
/*
|
||||
* Do not enable poison injection in Normalized Address mode.
|
||||
* Conversion between SPA and DPA is required for this, but it is
|
||||
* not supported in this mode.
|
||||
*/
|
||||
if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags))
|
||||
return 0;
|
||||
|
||||
/* Create poison attributes if all memdevs support the capabilities */
|
||||
for (int i = 0; i < p->nr_targets; i++) {
|
||||
struct cxl_endpoint_decoder *cxled = p->targets[i];
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
|
||||
if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
|
||||
!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR))
|
||||
return 0;
|
||||
}
|
||||
|
||||
dentry = cxl_debugfs_create_dir(dev_name(dev));
|
||||
debugfs_create_file("inject_poison", 0200, dentry, cxlr,
|
||||
&cxl_poison_inject_fops);
|
||||
debugfs_create_file("clear_poison", 0200, dentry, cxlr,
|
||||
&cxl_poison_clear_fops);
|
||||
|
||||
return devm_add_action_or_reset(dev, remove_debugfs, dentry);
|
||||
}
|
||||
|
||||
static int cxl_region_can_probe(struct cxl_region *cxlr)
|
||||
{
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
|
|
@ -4099,7 +4184,6 @@ static int cxl_region_probe(struct device *dev)
|
|||
{
|
||||
struct cxl_region *cxlr = to_cxl_region(dev);
|
||||
struct cxl_region_params *p = &cxlr->params;
|
||||
bool poison_supported = true;
|
||||
int rc;
|
||||
|
||||
rc = cxl_region_can_probe(cxlr);
|
||||
|
|
@ -4123,30 +4207,9 @@ static int cxl_region_probe(struct device *dev)
|
|||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Create poison attributes if all memdevs support the capabilities */
|
||||
for (int i = 0; i < p->nr_targets; i++) {
|
||||
struct cxl_endpoint_decoder *cxled = p->targets[i];
|
||||
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
|
||||
|
||||
if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
|
||||
!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) {
|
||||
poison_supported = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (poison_supported) {
|
||||
struct dentry *dentry;
|
||||
|
||||
dentry = cxl_debugfs_create_dir(dev_name(dev));
|
||||
debugfs_create_file("inject_poison", 0200, dentry, cxlr,
|
||||
&cxl_poison_inject_fops);
|
||||
debugfs_create_file("clear_poison", 0200, dentry, cxlr,
|
||||
&cxl_poison_clear_fops);
|
||||
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
rc = cxl_region_setup_poison(cxlr);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
switch (cxlr->mode) {
|
||||
case CXL_PARTMODE_PMEM:
|
||||
|
|
|
|||
|
|
@ -332,7 +332,7 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport);
|
|||
#define CXL_DECODER_F_TYPE3 BIT(3)
|
||||
#define CXL_DECODER_F_LOCK BIT(4)
|
||||
#define CXL_DECODER_F_ENABLE BIT(5)
|
||||
#define CXL_DECODER_F_MASK GENMASK(5, 0)
|
||||
#define CXL_DECODER_F_NORMALIZED_ADDRESSING BIT(6)
|
||||
|
||||
enum cxl_decoder_type {
|
||||
CXL_DECODER_DEVMEM = 2,
|
||||
|
|
@ -525,10 +525,19 @@ enum cxl_partition_mode {
|
|||
*/
|
||||
#define CXL_REGION_F_LOCK 2
|
||||
|
||||
/*
|
||||
* Indicate Normalized Addressing. Use it to disable SPA conversion if
|
||||
* HPA != SPA and an address translation callback handler does not
|
||||
* exist. Flag is needed by AMD Zen5 platforms.
|
||||
*/
|
||||
#define CXL_REGION_F_NORMALIZED_ADDRESSING 3
|
||||
|
||||
/**
|
||||
* struct cxl_region - CXL region
|
||||
* @dev: This region's device
|
||||
* @id: This region's id. Id is globally unique across all regions
|
||||
* @cxlrd: Region's root decoder
|
||||
* @hpa_range: Address range occupied by the region
|
||||
* @mode: Operational mode of the mapped capacity
|
||||
* @type: Endpoint decoder target type
|
||||
* @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
|
||||
|
|
@ -542,6 +551,8 @@ enum cxl_partition_mode {
|
|||
struct cxl_region {
|
||||
struct device dev;
|
||||
int id;
|
||||
struct cxl_root_decoder *cxlrd;
|
||||
struct range hpa_range;
|
||||
enum cxl_partition_mode mode;
|
||||
enum cxl_decoder_type type;
|
||||
struct cxl_nvdimm_bridge *cxl_nvb;
|
||||
|
|
@ -644,6 +655,15 @@ struct cxl_port {
|
|||
resource_size_t component_reg_phys;
|
||||
};
|
||||
|
||||
struct cxl_root;
|
||||
|
||||
struct cxl_root_ops {
|
||||
int (*qos_class)(struct cxl_root *cxl_root,
|
||||
struct access_coordinate *coord, int entries,
|
||||
int *qos_class);
|
||||
int (*translation_setup_root)(struct cxl_root *cxl_root, void *data);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct cxl_root - logical collection of root cxl_port items
|
||||
*
|
||||
|
|
@ -652,7 +672,7 @@ struct cxl_port {
|
|||
*/
|
||||
struct cxl_root {
|
||||
struct cxl_port port;
|
||||
const struct cxl_root_ops *ops;
|
||||
struct cxl_root_ops ops;
|
||||
};
|
||||
|
||||
static inline struct cxl_root *
|
||||
|
|
@ -661,12 +681,6 @@ to_cxl_root(const struct cxl_port *port)
|
|||
return container_of(port, struct cxl_root, port);
|
||||
}
|
||||
|
||||
struct cxl_root_ops {
|
||||
int (*qos_class)(struct cxl_root *cxl_root,
|
||||
struct access_coordinate *coord, int entries,
|
||||
int *qos_class);
|
||||
};
|
||||
|
||||
static inline struct cxl_dport *
|
||||
cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev)
|
||||
{
|
||||
|
|
@ -780,8 +794,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
|
|||
struct device *uport_dev,
|
||||
resource_size_t component_reg_phys,
|
||||
struct cxl_dport *parent_dport);
|
||||
struct cxl_root *devm_cxl_add_root(struct device *host,
|
||||
const struct cxl_root_ops *ops);
|
||||
struct cxl_root *devm_cxl_add_root(struct device *host);
|
||||
int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
|
||||
struct cxl_dport *parent_dport);
|
||||
struct cxl_root *find_cxl_root(struct cxl_port *port);
|
||||
|
|
@ -807,6 +820,13 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
|
|||
struct device *dport_dev, int port_id,
|
||||
resource_size_t rcrb);
|
||||
|
||||
#ifdef CONFIG_CXL_ATL
|
||||
void cxl_setup_prm_address_translation(struct cxl_root *cxl_root);
|
||||
#else
|
||||
static inline
|
||||
void cxl_setup_prm_address_translation(struct cxl_root *cxl_root) {}
|
||||
#endif
|
||||
|
||||
struct cxl_decoder *to_cxl_decoder(struct device *dev);
|
||||
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
|
||||
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
|
|||
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
|
||||
cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o
|
||||
cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras_rch.o
|
||||
cxl_core-$(CONFIG_CXL_ATL) += $(CXL_CORE_SRC)/atl.o
|
||||
cxl_core-y += config_check.o
|
||||
cxl_core-y += cxl_core_test.o
|
||||
cxl_core-y += cxl_core_exports.o
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue