block-7.0-20260305

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmmqPRMQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgplf5D/9uOsBr+OGXtkLUJtD6MiwoJUsYgYF2dMIx
 epcp+8RdMaOGtigtx69QXzTP5aPjA+AvBLAMYM+QDQDAPMWbRPsD7LaCYHy7ekwA
 OL68R3QRTMYPPgpuf7pKyhif7olozAvoWAnRaoWlo67rbK+mTzZsTIsgTwF4zUu6
 T0dL9thbWqtJMxKSuUk+DywggvGyNZWICJ3rAZ6os2htruH0fPhsJNGVFgNXMnpe
 Cy2OvWxBWRQkZnpDEocZUdYyCRVhHr7hu311j6nSLNXufqpgFmWLGO4C3vetOlgx
 ulEHfGNINcSLcw9R8pNWRxU14V6iw8Oy4nU9RtZhUpF32Iasvxb4H0w76Dp9Ukq1
 /DuoSkWg/Ahn24xSYxJwwZpOEE8L92pn0M2ukCfC6h7ytmDjjEL1AQ2kyFHV4mR3
 nc/3FkQ0abe3HHk8Rit6+txe3sSQo5no1z8kFlb9yp2MwAmonxCCQ9N1s7pxeeP+
 iLaPbGMaZ7Ra1GswD/vzxFQtkglsxLuM5D0JkjHe99a54ZnF0vF3y9jeDVOQbV1C
 H6/bU/2DI3SQ8xqv6tIXQ22reyRen3ao5VKLSrmrT/tDQVoEBV5SMnJFO1J8jBP4
 QST03wiu8ShHSyZ98KefwlsndrTX02V9UVD4FVj+TZXwCWltulnIR4dVYFdySWwW
 d613iUsWJw==
 =NNcQ
 -----END PGP SIGNATURE-----

Merge tag 'block-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Improve quirk visibility and configurability (Maurizio)
      - Fix runtime user modification to queue setup (Keith)
      - Fix multipath leak on try_module_get failure (Keith)
      - Ignore ambiguous spec definitions for better atomics support
        (John)
      - Fix admin queue leak on controller reset (Ming)
      - Fix large allocation in persistent reservation read keys
        (Sungwoo Kim)
      - Fix fcloop callback handling (Justin)
      - Securely free DHCHAP secrets (Daniel)
      - Various cleanups and typo fixes (John, Wilfred)

 - Avoid a circular lock dependency issue in the sysfs nr_requests or
   scheduler store handling

 - Fix a circular lock dependency with the pcpu mutex and the queue
   freeze lock

 - Cleanup for bio_copy_kern(), using __bio_add_page() rather than the
   bio_add_page(), as adding a page here cannot fail. The exiting code
   had broken cleanup for the error condition, so make it clear that the
   error condition cannot happen

 - Fix for a __this_cpu_read() in preemptible context splat

* tag 'block-7.0-20260305' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  block: use trylock to avoid lockdep circular dependency in sysfs
  nvme: fix memory allocation in nvme_pr_read_keys()
  block: use __bio_add_page in bio_copy_kern
  block: break pcpu_alloc_mutex dependency on freeze_lock
  blktrace: fix __this_cpu_read/write in preemptible context
  nvme-multipath: fix leak on try_module_get failure
  nvmet-fcloop: Check remoteport port_state before calling done callback
  nvme-pci: do not try to add queue maps at runtime
  nvme-pci: cap queue creation to used queues
  nvme-pci: ensure we're polling a polled queue
  nvme: fix memory leak in quirks_param_set()
  nvme: correct comment about nvme_ns_remove()
  nvme: stop setting namespace gendisk device driver data
  nvme: add support for dynamic quirk configuration via module parameter
  nvme: fix admin queue leak on controller reset
  nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
  nvme: stop using AWUPF
  nvme: expose active quirks in sysfs
  nvme/host: fixup some typos
This commit is contained in:
Linus Torvalds 2026-03-06 08:36:18 -08:00
commit a028739a43
15 changed files with 362 additions and 58 deletions

View file

@ -74,6 +74,7 @@
TPM TPM drivers are enabled. TPM TPM drivers are enabled.
UMS USB Mass Storage support is enabled. UMS USB Mass Storage support is enabled.
USB USB support is enabled. USB USB support is enabled.
NVME NVMe support is enabled
USBHID USB Human Interface Device support is enabled. USBHID USB Human Interface Device support is enabled.
V4L Video For Linux support is enabled. V4L Video For Linux support is enabled.
VGA The VGA console has been enabled. VGA The VGA console has been enabled.
@ -4787,6 +4788,18 @@ Kernel parameters
This can be set from sysctl after boot. This can be set from sysctl after boot.
See Documentation/admin-guide/sysctl/vm.rst for details. See Documentation/admin-guide/sysctl/vm.rst for details.
nvme.quirks= [NVME] A list of quirk entries to augment the built-in
nvme quirk list. List entries are separated by a
'-' character.
Each entry has the form VendorID:ProductID:quirk_names.
The IDs are 4-digits hex numbers and quirk_names is a
list of quirk names separated by commas. A quirk name
can be prefixed by '^', meaning that the specified
quirk must be disabled.
Example:
nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi
ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver. ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
See Documentation/core-api/debugging-via-ohci1394.rst for more See Documentation/core-api/debugging-via-ohci1394.rst for more
info. info.

View file

@ -398,8 +398,7 @@ static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int le
if (op_is_write(op)) if (op_is_write(op))
memcpy(page_address(page), p, bytes); memcpy(page_address(page), p, bytes);
if (bio_add_page(bio, page, bytes, 0) < bytes) __bio_add_page(bio, page, bytes, 0);
break;
len -= bytes; len -= bytes;
p += bytes; p += bytes;

View file

@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
} }
} }
static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags(
int new_nr_hw_queues) struct blk_mq_tag_set *set,
int new_nr_hw_queues)
{ {
struct blk_mq_tags **new_tags; struct blk_mq_tags **new_tags;
int i; int i;
if (set->nr_hw_queues >= new_nr_hw_queues) if (set->nr_hw_queues >= new_nr_hw_queues)
goto done; return NULL;
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node); GFP_KERNEL, set->numa_node);
if (!new_tags) if (!new_tags)
return -ENOMEM; return ERR_PTR(-ENOMEM);
if (set->tags) if (set->tags)
memcpy(new_tags, set->tags, set->nr_hw_queues * memcpy(new_tags, set->tags, set->nr_hw_queues *
sizeof(*set->tags)); sizeof(*set->tags));
kfree(set->tags);
set->tags = new_tags;
for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
if (!__blk_mq_alloc_map_and_rqs(set, i)) { if (blk_mq_is_shared_tags(set->flags)) {
while (--i >= set->nr_hw_queues) new_tags[i] = set->shared_tags;
__blk_mq_free_map_and_rqs(set, i); } else {
return -ENOMEM; new_tags[i] = blk_mq_alloc_map_and_rqs(set, i,
set->queue_depth);
if (!new_tags[i])
goto out_unwind;
} }
cond_resched(); cond_resched();
} }
done: return new_tags;
set->nr_hw_queues = new_nr_hw_queues; out_unwind:
return 0; while (--i >= set->nr_hw_queues) {
if (!blk_mq_is_shared_tags(set->flags))
blk_mq_free_map_and_rqs(set, new_tags[i], i);
}
kfree(new_tags);
return ERR_PTR(-ENOMEM);
} }
/* /*
@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
unsigned int memflags; unsigned int memflags;
int i; int i;
struct xarray elv_tbl; struct xarray elv_tbl;
struct blk_mq_tags **new_tags;
bool queues_frozen = false; bool queues_frozen = false;
lockdep_assert_held(&set->tag_list_lock); lockdep_assert_held(&set->tag_list_lock);
@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (blk_mq_elv_switch_none(q, &elv_tbl)) if (blk_mq_elv_switch_none(q, &elv_tbl))
goto switch_back; goto switch_back;
new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues);
if (IS_ERR(new_tags))
goto switch_back;
list_for_each_entry(q, &set->tag_list, tag_set_list) list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue_nomemsave(q); blk_mq_freeze_queue_nomemsave(q);
queues_frozen = true; queues_frozen = true;
if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) if (new_tags) {
goto switch_back; kfree(set->tags);
set->tags = new_tags;
}
set->nr_hw_queues = nr_hw_queues;
fallback: fallback:
blk_mq_update_queue_map(set); blk_mq_update_queue_map(set);

View file

@ -78,8 +78,14 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
/* /*
* Serialize updating nr_requests with concurrent queue_requests_store() * Serialize updating nr_requests with concurrent queue_requests_store()
* and switching elevator. * and switching elevator.
*
* Use trylock to avoid circular lock dependency with kernfs active
* reference during concurrent disk deletion:
* update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
* kn->active -> update_nr_hwq_lock (via this sysfs write path)
*/ */
down_write(&set->update_nr_hwq_lock); if (!down_write_trylock(&set->update_nr_hwq_lock))
return -EBUSY;
if (nr == q->nr_requests) if (nr == q->nr_requests)
goto unlock; goto unlock;

View file

@ -807,7 +807,16 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
elv_iosched_load_module(ctx.name); elv_iosched_load_module(ctx.name);
ctx.type = elevator_find_get(ctx.name); ctx.type = elevator_find_get(ctx.name);
down_read(&set->update_nr_hwq_lock); /*
* Use trylock to avoid circular lock dependency with kernfs active
* reference during concurrent disk deletion:
* update_nr_hwq_lock -> kn->active (via del_gendisk -> kobject_del)
* kn->active -> update_nr_hwq_lock (via this sysfs write path)
*/
if (!down_read_trylock(&set->update_nr_hwq_lock)) {
ret = -EBUSY;
goto out;
}
if (!blk_queue_no_elv_switch(q)) { if (!blk_queue_no_elv_switch(q)) {
ret = elevator_change(q, &ctx); ret = elevator_change(q, &ctx);
if (!ret) if (!ret)
@ -817,6 +826,7 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
} }
up_read(&set->update_nr_hwq_lock); up_read(&set->update_nr_hwq_lock);
out:
if (ctx.type) if (ctx.type)
elevator_put(ctx.type); elevator_put(ctx.type);
return ret; return ret;

View file

@ -2046,14 +2046,10 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
if (id->nabspf) if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs; boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
} else { } else {
/* if (ns->ctrl->awupf)
* Use the controller wide atomic write unit. This sucks dev_info_once(ns->ctrl->device,
* because the limit is defined in terms of logical blocks while "AWUPF ignored, only NAWUPF accepted\n");
* namespaces can have different formats, and because there is atomic_bs = bs;
* no clear language in the specification prohibiting different
* values for different controllers in the subsystem.
*/
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
} }
lim->atomic_write_hw_max = atomic_bs; lim->atomic_write_hw_max = atomic_bs;
@ -3222,7 +3218,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model)); memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid); subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic; subsys->cmic = id->cmic;
subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */ /* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC || if (id->cntrltype == NVME_CTRL_DISC ||
@ -3655,6 +3650,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device); dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled) else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device); dev_pm_qos_hide_latency_tolerance(ctrl->device);
ctrl->awupf = le16_to_cpu(id->awupf);
out_free: out_free:
kfree(id); kfree(id);
return ret; return ret;
@ -4186,13 +4182,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
nvme_mpath_add_disk(ns, info->anagrpid); nvme_mpath_add_disk(ns, info->anagrpid);
nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
/*
* Set ns->disk->device->driver_data to ns so we can access
* ns->head->passthru_err_log_enabled in
* nvme_io_passthru_err_log_enabled_[store | show]().
*/
dev_set_drvdata(disk_to_dev(ns->disk), ns);
return; return;
out_cleanup_ns_from_list: out_cleanup_ns_from_list:
@ -4865,6 +4854,13 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ret) if (ret)
return ret; return ret;
/*
* If a previous admin queue exists (e.g., from before a reset),
* put it now before allocating a new one to avoid orphaning it.
*/
if (ctrl->admin_q)
blk_put_queue(ctrl->admin_q);
ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->admin_q)) { if (IS_ERR(ctrl->admin_q)) {
ret = PTR_ERR(ctrl->admin_q); ret = PTR_ERR(ctrl->admin_q);

View file

@ -1290,8 +1290,8 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
kfree(opts->subsysnqn); kfree(opts->subsysnqn);
kfree(opts->host_traddr); kfree(opts->host_traddr);
kfree(opts->host_iface); kfree(opts->host_iface);
kfree(opts->dhchap_secret); kfree_sensitive(opts->dhchap_secret);
kfree(opts->dhchap_ctrl_secret); kfree_sensitive(opts->dhchap_ctrl_secret);
kfree(opts); kfree(opts);
} }
EXPORT_SYMBOL_GPL(nvmf_free_options); EXPORT_SYMBOL_GPL(nvmf_free_options);

View file

@ -1300,7 +1300,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
mutex_lock(&head->subsys->lock); mutex_lock(&head->subsys->lock);
/* /*
* We are called when all paths have been removed, and at that point * We are called when all paths have been removed, and at that point
* head->list is expected to be empty. However, nvme_remove_ns() and * head->list is expected to be empty. However, nvme_ns_remove() and
* nvme_init_ns_head() can run concurrently and so if head->delayed_ * nvme_init_ns_head() can run concurrently and so if head->delayed_
* removal_secs is configured, it is possible that by the time we reach * removal_secs is configured, it is possible that by the time we reach
* this point, head->list may no longer be empty. Therefore, we recheck * this point, head->list may no longer be empty. Therefore, we recheck
@ -1310,13 +1310,11 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
if (!list_empty(&head->list)) if (!list_empty(&head->list))
goto out; goto out;
if (head->delayed_removal_secs) { /*
/* * Ensure that no one could remove this module while the head
* Ensure that no one could remove this module while the head * remove work is pending.
* remove work is pending. */
*/ if (head->delayed_removal_secs && try_module_get(THIS_MODULE)) {
if (!try_module_get(THIS_MODULE))
goto out;
mod_delayed_work(nvme_wq, &head->remove_work, mod_delayed_work(nvme_wq, &head->remove_work,
head->delayed_removal_secs * HZ); head->delayed_removal_secs * HZ);
} else { } else {

View file

@ -180,6 +180,60 @@ enum nvme_quirks {
NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22), NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22),
}; };
static inline char *nvme_quirk_name(enum nvme_quirks q)
{
switch (q) {
case NVME_QUIRK_STRIPE_SIZE:
return "stripe_size";
case NVME_QUIRK_IDENTIFY_CNS:
return "identify_cns";
case NVME_QUIRK_DEALLOCATE_ZEROES:
return "deallocate_zeroes";
case NVME_QUIRK_DELAY_BEFORE_CHK_RDY:
return "delay_before_chk_rdy";
case NVME_QUIRK_NO_APST:
return "no_apst";
case NVME_QUIRK_NO_DEEPEST_PS:
return "no_deepest_ps";
case NVME_QUIRK_QDEPTH_ONE:
return "qdepth_one";
case NVME_QUIRK_MEDIUM_PRIO_SQ:
return "medium_prio_sq";
case NVME_QUIRK_IGNORE_DEV_SUBNQN:
return "ignore_dev_subnqn";
case NVME_QUIRK_DISABLE_WRITE_ZEROES:
return "disable_write_zeroes";
case NVME_QUIRK_SIMPLE_SUSPEND:
return "simple_suspend";
case NVME_QUIRK_SINGLE_VECTOR:
return "single_vector";
case NVME_QUIRK_128_BYTES_SQES:
return "128_bytes_sqes";
case NVME_QUIRK_SHARED_TAGS:
return "shared_tags";
case NVME_QUIRK_NO_TEMP_THRESH_CHANGE:
return "no_temp_thresh_change";
case NVME_QUIRK_NO_NS_DESC_LIST:
return "no_ns_desc_list";
case NVME_QUIRK_DMA_ADDRESS_BITS_48:
return "dma_address_bits_48";
case NVME_QUIRK_SKIP_CID_GEN:
return "skip_cid_gen";
case NVME_QUIRK_BOGUS_NID:
return "bogus_nid";
case NVME_QUIRK_NO_SECONDARY_TEMP_THRESH:
return "no_secondary_temp_thresh";
case NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND:
return "force_no_simple_suspend";
case NVME_QUIRK_BROKEN_MSI:
return "broken_msi";
case NVME_QUIRK_DMAPOOL_ALIGN_512:
return "dmapool_align_512";
}
return "unknown";
}
/* /*
* Common request structure for NVMe passthrough. All drivers must have * Common request structure for NVMe passthrough. All drivers must have
* this structure as the first member of their request-private data. * this structure as the first member of their request-private data.
@ -410,6 +464,8 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype; enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype; enum nvme_dctype dctype;
u16 awupf; /* 0's based value. */
}; };
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@ -442,7 +498,6 @@ struct nvme_subsystem {
u8 cmic; u8 cmic;
enum nvme_subsys_type subtype; enum nvme_subsys_type subtype;
u16 vendor_id; u16 vendor_id;
u16 awupf; /* 0's based value. */
struct ida ns_ida; struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy; enum nvme_iopolicy iopolicy;

View file

@ -72,6 +72,13 @@
static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <= static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <=
(1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE)); (1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE));
struct quirk_entry {
u16 vendor_id;
u16 dev_id;
u32 enabled_quirks;
u32 disabled_quirks;
};
static int use_threaded_interrupts; static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0444); module_param(use_threaded_interrupts, int, 0444);
@ -102,6 +109,143 @@ static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
static struct quirk_entry *nvme_pci_quirk_list;
static unsigned int nvme_pci_quirk_count;
/* Helper to parse individual quirk names */
static int nvme_parse_quirk_names(char *quirk_str, struct quirk_entry *entry)
{
int i;
size_t field_len;
bool disabled, found;
char *p = quirk_str, *field;
while ((field = strsep(&p, ",")) && *field) {
disabled = false;
found = false;
if (*field == '^') {
/* Skip the '^' character */
disabled = true;
field++;
}
field_len = strlen(field);
for (i = 0; i < 32; i++) {
unsigned int bit = 1U << i;
char *q_name = nvme_quirk_name(bit);
size_t q_len = strlen(q_name);
if (!strcmp(q_name, "unknown"))
break;
if (!strcmp(q_name, field) &&
q_len == field_len) {
if (disabled)
entry->disabled_quirks |= bit;
else
entry->enabled_quirks |= bit;
found = true;
break;
}
}
if (!found) {
pr_err("nvme: unrecognized quirk %s\n", field);
return -EINVAL;
}
}
return 0;
}
/* Helper to parse a single VID:DID:quirk_names entry */
static int nvme_parse_quirk_entry(char *s, struct quirk_entry *entry)
{
char *field;
field = strsep(&s, ":");
if (!field || kstrtou16(field, 16, &entry->vendor_id))
return -EINVAL;
field = strsep(&s, ":");
if (!field || kstrtou16(field, 16, &entry->dev_id))
return -EINVAL;
field = strsep(&s, ":");
if (!field)
return -EINVAL;
return nvme_parse_quirk_names(field, entry);
}
static int quirks_param_set(const char *value, const struct kernel_param *kp)
{
int count, err, i;
struct quirk_entry *qlist;
char *field, *val, *sep_ptr;
err = param_set_copystring(value, kp);
if (err)
return err;
val = kstrdup(value, GFP_KERNEL);
if (!val)
return -ENOMEM;
if (!*val)
goto out_free_val;
count = 1;
for (i = 0; val[i]; i++) {
if (val[i] == '-')
count++;
}
qlist = kcalloc(count, sizeof(*qlist), GFP_KERNEL);
if (!qlist) {
err = -ENOMEM;
goto out_free_val;
}
i = 0;
sep_ptr = val;
while ((field = strsep(&sep_ptr, "-"))) {
if (nvme_parse_quirk_entry(field, &qlist[i])) {
pr_err("nvme: failed to parse quirk string %s\n",
value);
goto out_free_qlist;
}
i++;
}
kfree(nvme_pci_quirk_list);
nvme_pci_quirk_count = count;
nvme_pci_quirk_list = qlist;
goto out_free_val;
out_free_qlist:
kfree(qlist);
out_free_val:
kfree(val);
return err;
}
static char quirks_param[128];
static const struct kernel_param_ops quirks_param_ops = {
.set = quirks_param_set,
.get = param_get_string,
};
static struct kparam_string quirks_param_string = {
.maxlen = sizeof(quirks_param),
.string = quirks_param,
};
module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0444);
MODULE_PARM_DESC(quirks, "Enable/disable NVMe quirks by specifying "
"quirks=VID:DID:quirk_names");
static int io_queue_count_set(const char *val, const struct kernel_param *kp) static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{ {
unsigned int n; unsigned int n;
@ -1496,7 +1640,8 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_queue *nvmeq = hctx->driver_data;
bool found; bool found;
if (!nvme_cqe_pending(nvmeq)) if (!test_bit(NVMEQ_POLLED, &nvmeq->flags) ||
!nvme_cqe_pending(nvmeq))
return 0; return 0;
spin_lock(&nvmeq->cq_poll_lock); spin_lock(&nvmeq->cq_poll_lock);
@ -2774,7 +2919,25 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->nr_write_queues = write_queues; dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues; dev->nr_poll_queues = poll_queues;
nr_io_queues = dev->nr_allocated_queues - 1; if (dev->ctrl.tagset) {
/*
* The set's maps are allocated only once at initialization
* time. We can't add special queues later if their mq_map
* wasn't preallocated.
*/
if (dev->ctrl.tagset->nr_maps < 3)
dev->nr_poll_queues = 0;
if (dev->ctrl.tagset->nr_maps < 2)
dev->nr_write_queues = 0;
}
/*
* The initial number of allocated queue slots may be too large if the
* user reduced the special queue parameters. Cap the value to the
* number we need for this round.
*/
nr_io_queues = min(nvme_max_io_queues(dev),
dev->nr_allocated_queues - 1);
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0) if (result < 0)
return result; return result;
@ -3458,12 +3621,25 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0; return 0;
} }
static struct quirk_entry *detect_dynamic_quirks(struct pci_dev *pdev)
{
int i;
for (i = 0; i < nvme_pci_quirk_count; i++)
if (pdev->vendor == nvme_pci_quirk_list[i].vendor_id &&
pdev->device == nvme_pci_quirk_list[i].dev_id)
return &nvme_pci_quirk_list[i];
return NULL;
}
static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
const struct pci_device_id *id) const struct pci_device_id *id)
{ {
unsigned long quirks = id->driver_data; unsigned long quirks = id->driver_data;
int node = dev_to_node(&pdev->dev); int node = dev_to_node(&pdev->dev);
struct nvme_dev *dev; struct nvme_dev *dev;
struct quirk_entry *qentry;
int ret = -ENOMEM; int ret = -ENOMEM;
dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids),
@ -3495,6 +3671,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
"platform quirk: setting simple suspend\n"); "platform quirk: setting simple suspend\n");
quirks |= NVME_QUIRK_SIMPLE_SUSPEND; quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
} }
qentry = detect_dynamic_quirks(pdev);
if (qentry) {
quirks |= qentry->enabled_quirks;
quirks &= ~qentry->disabled_quirks;
}
ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
quirks); quirks);
if (ret) if (ret)
@ -4095,6 +4276,7 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void) static void __exit nvme_exit(void)
{ {
kfree(nvme_pci_quirk_list);
pci_unregister_driver(&nvme_driver); pci_unregister_driver(&nvme_driver);
flush_workqueue(nvme_wq); flush_workqueue(nvme_wq);
} }

View file

@ -242,7 +242,7 @@ static int nvme_pr_read_keys(struct block_device *bdev,
if (rse_len > U32_MAX) if (rse_len > U32_MAX)
return -EINVAL; return -EINVAL;
rse = kzalloc(rse_len, GFP_KERNEL); rse = kvzalloc(rse_len, GFP_KERNEL);
if (!rse) if (!rse)
return -ENOMEM; return -ENOMEM;
@ -267,7 +267,7 @@ static int nvme_pr_read_keys(struct block_device *bdev,
} }
free_rse: free_rse:
kfree(rse); kvfree(rse);
return ret; return ret;
} }

View file

@ -601,6 +601,28 @@ static ssize_t dctype_show(struct device *dev,
} }
static DEVICE_ATTR_RO(dctype); static DEVICE_ATTR_RO(dctype);
static ssize_t quirks_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int count = 0, i;
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
unsigned long quirks = ctrl->quirks;
if (!quirks)
return sysfs_emit(buf, "none\n");
for (i = 0; quirks; ++i) {
if (quirks & 1) {
count += sysfs_emit_at(buf, count, "%s\n",
nvme_quirk_name(BIT(i)));
}
quirks >>= 1;
}
return count;
}
static DEVICE_ATTR_RO(quirks);
#ifdef CONFIG_NVME_HOST_AUTH #ifdef CONFIG_NVME_HOST_AUTH
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
@ -742,6 +764,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_kato.attr, &dev_attr_kato.attr,
&dev_attr_cntrltype.attr, &dev_attr_cntrltype.attr,
&dev_attr_dctype.attr, &dev_attr_dctype.attr,
&dev_attr_quirks.attr,
#ifdef CONFIG_NVME_HOST_AUTH #ifdef CONFIG_NVME_HOST_AUTH
&dev_attr_dhchap_secret.attr, &dev_attr_dhchap_secret.attr,
&dev_attr_dhchap_ctrl_secret.attr, &dev_attr_dhchap_ctrl_secret.attr,

View file

@ -25,7 +25,8 @@
struct nvme_tcp_queue; struct nvme_tcp_queue;
/* Define the socket priority to use for connections were it is desirable /*
* Define the socket priority to use for connections where it is desirable
* that the NIC consider performing optimized packet processing or filtering. * that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any * A non-zero value being sufficient to indicate general consideration of any
* possible optimization. Making it a module param allows for alternative * possible optimization. Making it a module param allows for alternative
@ -926,7 +927,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
req->curr_bio = req->curr_bio->bi_next; req->curr_bio = req->curr_bio->bi_next;
/* /*
* If we don`t have any bios it means that controller * If we don't have any bios it means the controller
* sent more data than we requested, hence error * sent more data than we requested, hence error
*/ */
if (!req->curr_bio) { if (!req->curr_bio) {

View file

@ -491,6 +491,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
struct fcloop_rport *rport = remoteport->private; struct fcloop_rport *rport = remoteport->private;
struct nvmet_fc_target_port *targetport = rport->targetport; struct nvmet_fc_target_port *targetport = rport->targetport;
struct fcloop_tport *tport; struct fcloop_tport *tport;
int ret = 0;
if (!targetport) { if (!targetport) {
/* /*
@ -500,12 +501,18 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
* We end up here from delete association exchange: * We end up here from delete association exchange:
* nvmet_fc_xmt_disconnect_assoc sends an async request. * nvmet_fc_xmt_disconnect_assoc sends an async request.
* *
* Return success because this is what LLDDs do; silently * Return success when remoteport is still online because this
* drop the response. * is what LLDDs do and silently drop the response. Otherwise,
* return with error to signal upper layer to perform the lsrsp
* resource cleanup.
*/ */
lsrsp->done(lsrsp); if (remoteport->port_state == FC_OBJSTATE_ONLINE)
lsrsp->done(lsrsp);
else
ret = -ENODEV;
kmem_cache_free(lsreq_cache, tls_req); kmem_cache_free(lsreq_cache, tls_req);
return 0; return ret;
} }
memcpy(lsreq->rspaddr, lsrsp->rspbuf, memcpy(lsreq->rspaddr, lsrsp->rspbuf,

View file

@ -383,8 +383,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
cpu = raw_smp_processor_id(); cpu = raw_smp_processor_id();
if (blk_tracer) { if (blk_tracer) {
tracing_record_cmdline(current);
buffer = blk_tr->array_buffer.buffer; buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0); trace_ctx = tracing_gen_ctx_flags(0);
switch (bt->version) { switch (bt->version) {
@ -419,6 +417,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
if (!event) if (!event)
return; return;
tracing_record_cmdline(current);
switch (bt->version) { switch (bt->version) {
case 1: case 1:
record_blktrace_event(ring_buffer_event_data(event), record_blktrace_event(ring_buffer_event_data(event),