block-6.15-20250515

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmgmVlYQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpg3FD/9KaCQn3VOI6NBMSgNP2FRjGhHbyza6kjN8
 Q1g0wVTEESt5jBDoNuahA1aTevhBT9qDNG3d7PPwg0OEsE/WW0/RBMe16W9VlDz5
 ph3S+nNLaBXQ6aZgH/qyH8eM+CEdoV2FhCUaikuQ25mft6BeonYGTEHVmdnfAg0i
 er7XnBl/lYNSzdzy1bEbGb0N7Lheasa386Z9oMevCtqgObG/XIFvc2otxxx1QmJ9
 iVcJJSnHf1Y9oqYGjebmqRoYnp9d3KEeAA1lMamlshVyX3DcZcXqMe0xEBZflGoG
 +nLUQ9Qjk38Azp+FOqQh/D4Bs/jXzThSsizzTwgBmHA5Pu5dCC0ZyPSrSX8Aahd6
 Yf4awgwFBH7vAvIbf8GMDloNxa6IAuZRzDig/fgqF2thSJJBghA7HNl92oz3BvoY
 KsnpdxPa8EtrTc0n5WNDj9/0m1msDfzlgLUBgSTM4N0fPlEkDYzLnBSi7d5jq8K4
 3lLqmZ/YhBeyzx37pTHLzE3rax/gZ6yLxyxliHrN/F9wSZRbc/PpMu1TdaSwElyk
 F5VyLMOMHA4PPpqkNdn1TgPd2GQ/uqDkNA0oO7wstBQ5rBuHN4RmItpQBq3lov+Z
 JyntB5th//2IFEVWAso6Ct67hqUAC8+JNmkTMpRzLaKgX4qr2ZBeGJIx445dhh+z
 C99L/zA3XA==
 =s90l
 -----END PGP SIGNATURE-----

Merge tag 'block-6.15-20250515' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Christoph:
      - fixes for atomic writes (Alan Adamson)
      - fixes for polled CQs in nvmet-epf (Damien Le Moal)
      - fix for polled CQs in nvme-pci (Keith Busch)
      - fix compile on odd configs that need to be forced to inline
        (Kees Cook)
      - one more quirk (Ilya Guterman)

 - Fix for missing allocation of an integrity buffer for some cases

 - Fix for a regression with ublk command cancelation

* tag 'block-6.15-20250515' of git://git.kernel.dk/linux:
  ublk: fix dead loop when canceling io command
  nvme-pci: add NVME_QUIRK_NO_DEEPEST_PS quirk for SOLIDIGM P44 Pro
  nvme: all namespaces in a subsystem must adhere to a common atomic write size
  nvme: multipath: enable BLK_FEAT_ATOMIC_WRITES for multipathing
  nvmet: pci-epf: remove NVMET_PCI_EPF_Q_IS_SQ
  nvmet: pci-epf: improve debug message
  nvmet: pci-epf: cleanup nvmet_pci_epf_raise_irq()
  nvmet: pci-epf: do not fall back to using INTX if not supported
  nvmet: pci-epf: clear completion queue IRQ flag on delete
  nvme-pci: acquire cq_poll_lock in nvme_poll_irqdisable
  nvme-pci: make nvme_pci_npages_prp() __always_inline
  block: always allocate integrity buffer when required
This commit is contained in:
Linus Torvalds 2025-05-16 10:21:25 -07:00
commit 6462c247b2
7 changed files with 107 additions and 38 deletions

View file

@ -9,6 +9,7 @@
* not aware of PI.
*/
#include <linux/blk-integrity.h>
#include <linux/t10-pi.h>
#include <linux/workqueue.h>
#include "blk.h"
@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work)
bio_endio(bio);
}
#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
static bool bip_should_check(struct bio_integrity_payload *bip)
{
return bip->bip_flags & BIP_CHECK_FLAGS;
}
static bool bi_offload_capable(struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
return bi->tuple_size == sizeof(struct crc64_pi_tuple);
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
return bi->tuple_size == sizeof(struct t10_pi_tuple);
default:
pr_warn_once("%s: unknown integrity checksum type:%d\n",
__func__, bi->csum_type);
fallthrough;
case BLK_INTEGRITY_CSUM_NONE:
return false;
}
}
/**
* __bio_integrity_endio - Integrity I/O completion function
* @bio: Protected bio
@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_integrity_data *bid =
container_of(bip, struct bio_integrity_data, bip);
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
bip_should_check(bip)) {
INIT_WORK(&bid->work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bid->work);
return false;
@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_data *bid;
bool set_flags = true;
gfp_t gfp = GFP_NOIO;
unsigned int len;
void *buf;
@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio)
switch (bio_op(bio)) {
case REQ_OP_READ:
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
return true;
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
if (bi_offload_capable(bi))
return true;
set_flags = false;
}
break;
case REQ_OP_WRITE:
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
return true;
/*
* Zero the memory allocated to not leak uninitialized kernel
* memory to disk for non-integrity metadata where nothing else
* initializes the memory.
*/
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
if (bi_offload_capable(bi))
return true;
set_flags = false;
gfp |= __GFP_ZERO;
} else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
gfp |= __GFP_ZERO;
break;
default:
@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
if (bi->csum_type)
bid->bip.bip_flags |= BIP_CHECK_GUARD;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
if (set_flags) {
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
if (bi->csum_type)
bid->bip.bip_flags |= BIP_CHECK_GUARD;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
}
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf)) < len)
goto err_end_io;
/* Auto-generate integrity metadata if this is a write */
if (bio_data_dir(bio) == WRITE)
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;

View file

@ -1708,7 +1708,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
* that ublk_dispatch_req() is always called
*/
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
if (req && blk_mq_request_started(req))
if (req && blk_mq_request_started(req) && req->tag == tag)
return;
spin_lock(&ubq->cancel_lock);

View file

@ -2059,7 +2059,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
atomic_bs = (1 + ns->ctrl->awupf) * bs;
/*
* Set subsystem atomic bs.
*/
if (ns->ctrl->subsys->atomic_bs) {
if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
dev_err_ratelimited(ns->ctrl->device,
"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
ns->disk ? ns->disk->disk_name : "?",
ns->ctrl->subsys->atomic_bs,
atomic_bs);
}
} else
ns->ctrl->subsys->atomic_bs = atomic_bs;
nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
}
@ -2201,6 +2215,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
nvme_set_chunk_sectors(ns, id, &lim);
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
/*
* Validate the max atomic write size fits within the subsystem's
* atomic write capabilities.
*/
if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
blk_mq_unfreeze_queue(ns->disk->queue, memflags);
ret = -ENXIO;
goto out;
}
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@ -3031,7 +3056,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
kfree(subsys);
return -EINVAL;
}
subsys->awupf = le16_to_cpu(id->awupf);
nvme_mpath_default_iopolicy(subsys);
subsys->dev.class = &nvme_subsys_class;
@ -3441,7 +3465,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;

View file

@ -638,7 +638,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_set_stacking_limits(&lim);
lim.dma_alignment = 3;
lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
if (head->ids.csi == NVME_CSI_ZNS)
lim.features |= BLK_FEAT_ZONED;

View file

@ -410,6 +410,7 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@ -442,11 +443,11 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
#endif
u32 atomic_bs;
};
/*

View file

@ -390,7 +390,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
* as it only leads to a small amount of wasted memory for the lifetime of
* the I/O.
*/
static int nvme_pci_npages_prp(void)
static __always_inline int nvme_pci_npages_prp(void)
{
unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE;
unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE);
@ -1202,7 +1202,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
spin_lock(&nvmeq->cq_poll_lock);
nvme_poll_cq(nvmeq, NULL);
spin_unlock(&nvmeq->cq_poll_lock);
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
@ -3737,6 +3739,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */

View file

@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex);
#define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1)
enum nvmet_pci_epf_queue_flags {
NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */
NVMET_PCI_EPF_Q_LIVE, /* The queue is live */
NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */
NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */
};
@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_irq_vector *iv = cq->iv;
bool ret;
if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
return false;
/* IRQ coalescing for the admin queue is not allowed. */
if (!cq->qid)
return true;
@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
struct pci_epf *epf = nvme_epf->epf;
int ret = 0;
if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) ||
!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
return;
mutex_lock(&ctrl->irq_lock);
@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
switch (nvme_epf->irq_type) {
case PCI_IRQ_MSIX:
case PCI_IRQ_MSI:
/*
* If we fail to raise an MSI or MSI-X interrupt, it is likely
* because the host is using legacy INTX IRQs (e.g. BIOS,
* grub), but we can fallback to the INTX type only if the
* endpoint controller supports this type.
*/
ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
nvme_epf->irq_type, cq->vector + 1);
if (!ret)
if (!ret || !nvme_epf->epc_features->intx_capable)
break;
/*
* If we got an error, it is likely because the host is using
* legacy IRQs (e.g. BIOS, grub).
*/
fallthrough;
case PCI_IRQ_INTX:
ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
}
if (ret)
dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret);
dev_err_ratelimited(ctrl->dev,
"CQ[%u]: Failed to raise IRQ (err=%d)\n",
cq->qid, ret);
unlock:
mutex_unlock(&ctrl->irq_lock);
@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);
if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
dev_dbg(ctrl->dev,
"CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);
else
dev_dbg(ctrl->dev,
"CQ[%u]: %u entries of %zu B, IRQ disabled\n",
cqid, qsize, cq->qes);
return NVME_SC_SUCCESS;
@ -1344,7 +1351,8 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
return NVME_SC_SUCCESS;
@ -1533,7 +1541,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl,
if (sq) {
queue = &ctrl->sq[qid];
set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags);
} else {
queue = &ctrl->cq[qid];
INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work);