mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:44:45 +01:00
-----BEGIN PGP SIGNATURE-----
iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmmGPZwQHGF4Ym9lQGtl
cm5lbC5kawAKCRD301j7KXHgpjy5EAC8z4IFCz+ua+q3hqJIlGfTlkxR6kM+DMn/
WKqaFYjnwzwApYe7kgBtlVcINnX5riCdNEk70tG1SCkAHqqdnzF4Ps1kQz0RflXS
7DftN76hSTUbEfolQWTzqDAGMrcn7GUjjjwaRKjSVF30UBKjZ6U4fKfyzWChEwah
UtnmLMd3Osl58C9RTcjQPN1qMeQagmLej9C8plyCu9iLauoLA8XlkjxWvXRCcYwc
L+IY9F0s1rxmjGZ3eeaevs7V59RjOwJZvL4EPICajkx3oE7EAxS3VVt0p9LC3tPD
F4U6SXL0UkIeinduKlbEGP17N6l/4a4Twetyu6rSu//APzKIPAOPeD2xqIbrNSlI
rxHqKCsI8KW5JfNTvo9+JjiDOeDxRwt19ZCVCFUzXcsNfRq0EljtuY/4V5P1tPr9
0rOe5SdYS94AncwrabeV/ZOLEGmujjY9YhsCcP3J49LDkFG+T3fBgCpmFWwlWLs7
92MUHVcQmvb+j0z/fZVWRsqzhqtHBG4SO4yg2+Q0RQZeWnsVNTOR5cWfUEShI9G+
hnfYLdyyBTy37n60WXJOq2VhiWbPDAetEjKr+ulbD9hvpPdh6QL7rFiWZsVlnc7V
wUQoUjNltfHlyPI/YSwqa9YyyLPAl6YGKba2/qBKSwFTQmFLpSynJIa87W6jUx6B
sofywm9ZZw==
=faTj
-----END PGP SIGNATURE-----
Merge tag 'for-7.0/block-stable-pages-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull bounce buffer dio for stable pages from Jens Axboe:
"This adds support for bounce buffering of dio for stable pages. This
was all done by Christoph. In his words:
This series tries to address the problem that under I/O pages can be
modified during direct I/O, even when the device or file system
require stable pages during I/O to calculate checksums, parity or data
operations. It does so by adding block layer helpers to bounce buffer
an iov_iter into a bio, then wires that up in iomap and ultimately
XFS.
The reason that the file system even needs to know about it, is
because reads need a user context to copy the data back, and the
infrastructure to defer ioends to a workqueue currently sits in XFS.
I'm going to look into moving that into ioend and enabling it for
other file systems. Additionally btrfs already has it's own
infrastructure for this, and actually an urgent need to bounce buffer,
so this should be useful there and could be wire up easily. In fact
the idea comes from patches by Qu that did this in btrfs.
This patch fixes all but one xfstests failures on T10 PI capable
devices (generic/095 seems to have issues with a mix of mmap and
splice still, I'm looking into that separately), and make qemu VMs
running Windows, or Linux with swap enabled fine on an XFS file on a
device using PI.
Performance numbers on my (not exactly state of the art) NVMe PI test
setup:
Sequential reads using io_uring, QD=16.
Bandwidth and CPU usage (usr/sys):
| size | zero copy | bounce |
+------+--------------------------+--------------------------+
| 4k | 1316MiB/s (12.65/55.40%) | 1081MiB/s (11.76/49.78%) |
| 64K | 3370MiB/s ( 5.46/18.20%) | 3365MiB/s ( 4.47/15.68%) |
| 1M | 3401MiB/s ( 0.76/23.05%) | 3400MiB/s ( 0.80/09.06%) |
+------+--------------------------+--------------------------+
Sequential writes using io_uring, QD=16.
Bandwidth and CPU usage (usr/sys):
| size | zero copy | bounce |
+------+--------------------------+--------------------------+
| 4k | 882MiB/s (11.83/33.88%) | 750MiB/s (10.53/34.08%) |
| 64K | 2009MiB/s ( 7.33/15.80%) | 2007MiB/s ( 7.47/24.71%) |
| 1M | 1992MiB/s ( 7.26/ 9.13%) | 1992MiB/s ( 9.21/19.11%) |
+------+--------------------------+--------------------------+
Note that the 64k read numbers look really odd to me for the baseline
zero copy case, but are reproducible over many repeated runs.
The bounce read numbers should further improve when moving the PI
validation to the file system and removing the double context switch,
which I have patches for that will sent out soon"
* tag 'for-7.0/block-stable-pages-20260206' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
xfs: use bounce buffering direct I/O when the device requires stable pages
iomap: add a flag to bounce buffer direct I/O
iomap: support ioends for direct reads
iomap: rename IOMAP_DIO_DIRTY to IOMAP_DIO_USER_BACKED
iomap: free the bio before completing the dio
iomap: share code between iomap_dio_bio_end_io and iomap_finish_ioend_direct
iomap: split out the per-bio logic from iomap_dio_bio_iter
iomap: simplify iomap_dio_bio_iter
iomap: fix submission side handling of completion side errors
block: add helpers to bounce buffer an iov_iter into bios
block: remove bio_release_page
iov_iter: extract a iov_iter_extract_bvecs helper from bio code
block: open code bio_add_page and fix handling of mismatching P2P ranges
block: refactor get_contig_folio_len
block: add a BIO_MAX_SIZE constant and use it
1171 lines
32 KiB
C
1171 lines
32 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Functions related to segment and merge handling
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/blk-integrity.h>
|
|
#include <linux/part_stat.h>
|
|
#include <linux/blk-cgroup.h>
|
|
|
|
#include <trace/events/block.h>
|
|
|
|
#include "blk.h"
|
|
#include "blk-mq-sched.h"
|
|
#include "blk-rq-qos.h"
|
|
#include "blk-throttle.h"
|
|
|
|
static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
|
|
{
|
|
*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
|
|
}
|
|
|
|
static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
|
|
{
|
|
struct bvec_iter iter = bio->bi_iter;
|
|
int idx;
|
|
|
|
bio_get_first_bvec(bio, bv);
|
|
if (bv->bv_len == bio->bi_iter.bi_size)
|
|
return; /* this bio only has a single bvec */
|
|
|
|
bio_advance_iter(bio, &iter, iter.bi_size);
|
|
|
|
if (!iter.bi_bvec_done)
|
|
idx = iter.bi_idx - 1;
|
|
else /* in the middle of bvec */
|
|
idx = iter.bi_idx;
|
|
|
|
*bv = bio->bi_io_vec[idx];
|
|
|
|
/*
|
|
* iter.bi_bvec_done records actual length of the last bvec
|
|
* if this bio ends in the middle of one io vector
|
|
*/
|
|
if (iter.bi_bvec_done)
|
|
bv->bv_len = iter.bi_bvec_done;
|
|
}
|
|
|
|
static inline bool bio_will_gap(struct request_queue *q,
|
|
struct request *prev_rq, struct bio *prev, struct bio *next)
|
|
{
|
|
struct bio_vec pb, nb;
|
|
|
|
if (!bio_has_data(prev) || !queue_virt_boundary(q))
|
|
return false;
|
|
|
|
/*
|
|
* Don't merge if the 1st bio starts with non-zero offset, otherwise it
|
|
* is quite difficult to respect the sg gap limit. We work hard to
|
|
* merge a huge number of small single bios in case of mkfs.
|
|
*/
|
|
if (prev_rq)
|
|
bio_get_first_bvec(prev_rq->bio, &pb);
|
|
else
|
|
bio_get_first_bvec(prev, &pb);
|
|
if (pb.bv_offset & queue_virt_boundary(q))
|
|
return true;
|
|
|
|
/*
|
|
* We don't need to worry about the situation that the merged segment
|
|
* ends in unaligned virt boundary:
|
|
*
|
|
* - if 'pb' ends aligned, the merged segment ends aligned
|
|
* - if 'pb' ends unaligned, the next bio must include
|
|
* one single bvec of 'nb', otherwise the 'nb' can't
|
|
* merge with 'pb'
|
|
*/
|
|
bio_get_last_bvec(prev, &pb);
|
|
bio_get_first_bvec(next, &nb);
|
|
if (biovec_phys_mergeable(q, &pb, &nb))
|
|
return false;
|
|
return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
|
|
}
|
|
|
|
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
|
|
{
|
|
return bio_will_gap(req->q, req, req->biotail, bio);
|
|
}
|
|
|
|
static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
|
|
{
|
|
return bio_will_gap(req->q, NULL, bio, req->bio);
|
|
}
|
|
|
|
/*
|
|
* The maximum size that a bio can fit has to be aligned down to the
|
|
* logical block size, which is the minimum accepted unit by hardware.
|
|
*/
|
|
static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
|
|
{
|
|
return round_down(BIO_MAX_SIZE, lim->logical_block_size) >>
|
|
SECTOR_SHIFT;
|
|
}
|
|
|
|
/*
|
|
* bio_submit_split_bioset - Submit a bio, splitting it at a designated sector
|
|
* @bio: the original bio to be submitted and split
|
|
* @split_sectors: the sector count at which to split
|
|
* @bs: the bio set used for allocating the new split bio
|
|
*
|
|
* The original bio is modified to contain the remaining sectors and submitted.
|
|
* The caller is responsible for submitting the returned bio.
|
|
*
|
|
* If succeed, the newly allocated bio representing the initial part will be
|
|
* returned, on failure NULL will be returned and original bio will fail.
|
|
*/
|
|
struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
|
|
struct bio_set *bs)
|
|
{
|
|
struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs);
|
|
|
|
if (IS_ERR(split)) {
|
|
bio->bi_status = errno_to_blk_status(PTR_ERR(split));
|
|
bio_endio(bio);
|
|
return NULL;
|
|
}
|
|
|
|
bio_chain(split, bio);
|
|
trace_block_split(split, bio->bi_iter.bi_sector);
|
|
WARN_ON_ONCE(bio_zone_write_plugging(bio));
|
|
|
|
if (should_fail_bio(bio))
|
|
bio_io_error(bio);
|
|
else if (!blk_throtl_bio(bio))
|
|
submit_bio_noacct_nocheck(bio, true);
|
|
|
|
return split;
|
|
}
|
|
EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
|
|
|
|
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
|
|
{
|
|
if (unlikely(split_sectors < 0)) {
|
|
bio->bi_status = errno_to_blk_status(split_sectors);
|
|
bio_endio(bio);
|
|
return NULL;
|
|
}
|
|
|
|
if (split_sectors) {
|
|
bio = bio_submit_split_bioset(bio, split_sectors,
|
|
&bio->bi_bdev->bd_disk->bio_split);
|
|
if (bio)
|
|
bio->bi_opf |= REQ_NOMERGE;
|
|
}
|
|
|
|
return bio;
|
|
}
|
|
|
|
static struct bio *__bio_split_discard(struct bio *bio,
|
|
const struct queue_limits *lim, unsigned *nsegs,
|
|
unsigned int max_sectors)
|
|
{
|
|
unsigned int max_discard_sectors, granularity;
|
|
sector_t tmp;
|
|
unsigned split_sectors;
|
|
|
|
*nsegs = 1;
|
|
|
|
granularity = max(lim->discard_granularity >> 9, 1U);
|
|
|
|
max_discard_sectors = min(max_sectors, bio_allowed_max_sectors(lim));
|
|
max_discard_sectors -= max_discard_sectors % granularity;
|
|
if (unlikely(!max_discard_sectors))
|
|
return bio;
|
|
|
|
if (bio_sectors(bio) <= max_discard_sectors)
|
|
return bio;
|
|
|
|
split_sectors = max_discard_sectors;
|
|
|
|
/*
|
|
* If the next starting sector would be misaligned, stop the discard at
|
|
* the previous aligned sector.
|
|
*/
|
|
tmp = bio->bi_iter.bi_sector + split_sectors -
|
|
((lim->discard_alignment >> 9) % granularity);
|
|
tmp = sector_div(tmp, granularity);
|
|
|
|
if (split_sectors > tmp)
|
|
split_sectors -= tmp;
|
|
|
|
return bio_submit_split(bio, split_sectors);
|
|
}
|
|
|
|
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
|
|
unsigned *nsegs)
|
|
{
|
|
unsigned int max_sectors;
|
|
|
|
if (bio_op(bio) == REQ_OP_SECURE_ERASE)
|
|
max_sectors = lim->max_secure_erase_sectors;
|
|
else
|
|
max_sectors = lim->max_discard_sectors;
|
|
|
|
return __bio_split_discard(bio, lim, nsegs, max_sectors);
|
|
}
|
|
|
|
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
|
|
bool is_atomic)
|
|
{
|
|
/*
|
|
* chunk_sectors must be a multiple of atomic_write_boundary_sectors if
|
|
* both non-zero.
|
|
*/
|
|
if (is_atomic && lim->atomic_write_boundary_sectors)
|
|
return lim->atomic_write_boundary_sectors;
|
|
|
|
return lim->chunk_sectors;
|
|
}
|
|
|
|
/*
|
|
* Return the maximum number of sectors from the start of a bio that may be
|
|
* submitted as a single request to a block device. If enough sectors remain,
|
|
* align the end to the physical block size. Otherwise align the end to the
|
|
* logical block size. This approach minimizes the number of non-aligned
|
|
* requests that are submitted to a block device if the start of a bio is not
|
|
* aligned to a physical block boundary.
|
|
*/
|
|
static inline unsigned get_max_io_size(struct bio *bio,
|
|
const struct queue_limits *lim)
|
|
{
|
|
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
|
|
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
|
|
bool is_atomic = bio->bi_opf & REQ_ATOMIC;
|
|
unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
|
|
unsigned max_sectors, start, end;
|
|
|
|
/*
|
|
* We ignore lim->max_sectors for atomic writes because it may less
|
|
* than the actual bio size, which we cannot tolerate.
|
|
*/
|
|
if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
|
|
max_sectors = lim->max_write_zeroes_sectors;
|
|
else if (is_atomic)
|
|
max_sectors = lim->atomic_write_max_sectors;
|
|
else
|
|
max_sectors = lim->max_sectors;
|
|
|
|
if (boundary_sectors) {
|
|
max_sectors = min(max_sectors,
|
|
blk_boundary_sectors_left(bio->bi_iter.bi_sector,
|
|
boundary_sectors));
|
|
}
|
|
|
|
start = bio->bi_iter.bi_sector & (pbs - 1);
|
|
end = (start + max_sectors) & ~(pbs - 1);
|
|
if (end > start)
|
|
return end - start;
|
|
return max_sectors & ~(lbs - 1);
|
|
}
|
|
|
|
/**
|
|
* bvec_split_segs - verify whether or not a bvec should be split in the middle
|
|
* @lim: [in] queue limits to split based on
|
|
* @bv: [in] bvec to examine
|
|
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
|
|
* by the number of segments from @bv that may be appended to that
|
|
* bio without exceeding @max_segs
|
|
* @bytes: [in,out] Number of bytes in the bio being built. Incremented
|
|
* by the number of bytes from @bv that may be appended to that
|
|
* bio without exceeding @max_bytes
|
|
* @max_segs: [in] upper bound for *@nsegs
|
|
* @max_bytes: [in] upper bound for *@bytes
|
|
*
|
|
* When splitting a bio, it can happen that a bvec is encountered that is too
|
|
* big to fit in a single segment and hence that it has to be split in the
|
|
* middle. This function verifies whether or not that should happen. The value
|
|
* %true is returned if and only if appending the entire @bv to a bio with
|
|
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
|
|
* the block driver.
|
|
*/
|
|
static bool bvec_split_segs(const struct queue_limits *lim,
|
|
const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
|
|
unsigned max_segs, unsigned max_bytes)
|
|
{
|
|
unsigned max_len = max_bytes - *bytes;
|
|
unsigned len = min(bv->bv_len, max_len);
|
|
unsigned total_len = 0;
|
|
unsigned seg_size = 0;
|
|
|
|
while (len && *nsegs < max_segs) {
|
|
seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
|
|
|
|
(*nsegs)++;
|
|
total_len += seg_size;
|
|
len -= seg_size;
|
|
|
|
if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
|
|
break;
|
|
}
|
|
|
|
*bytes += total_len;
|
|
|
|
/* tell the caller to split the bvec if it is too big to fit */
|
|
return len > 0 || bv->bv_len > max_len;
|
|
}
|
|
|
|
static unsigned int bio_split_alignment(struct bio *bio,
|
|
const struct queue_limits *lim)
|
|
{
|
|
if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
|
|
return lim->zone_write_granularity;
|
|
return lim->logical_block_size;
|
|
}
|
|
|
|
static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv,
|
|
struct bio_vec *bv)
|
|
{
|
|
return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len);
|
|
}
|
|
|
|
/**
|
|
* bio_split_io_at - check if and where to split a bio
|
|
* @bio: [in] bio to be split
|
|
* @lim: [in] queue limits to split based on
|
|
* @segs: [out] number of segments in the bio with the first half of the sectors
|
|
* @max_bytes: [in] maximum number of bytes per bio
|
|
* @len_align_mask: [in] length alignment mask for each vector
|
|
*
|
|
* Find out if @bio needs to be split to fit the queue limits in @lim and a
|
|
* maximum size of @max_bytes. Returns a negative error number if @bio can't be
|
|
* split, 0 if the bio doesn't have to be split, or a positive sector offset if
|
|
* @bio needs to be split.
|
|
*/
|
|
int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
|
|
unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
|
|
{
|
|
struct bio_crypt_ctx *bc = bio_crypt_ctx(bio);
|
|
struct bio_vec bv, bvprv, *bvprvp = NULL;
|
|
unsigned nsegs = 0, bytes = 0, gaps = 0;
|
|
struct bvec_iter iter;
|
|
unsigned start_align_mask = lim->dma_alignment;
|
|
|
|
if (bc) {
|
|
start_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
|
|
len_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
|
|
}
|
|
|
|
bio_for_each_bvec(bv, bio, iter) {
|
|
if (bv.bv_offset & start_align_mask ||
|
|
bv.bv_len & len_align_mask)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If the queue doesn't support SG gaps and adding this
|
|
* offset would create a gap, disallow it.
|
|
*/
|
|
if (bvprvp) {
|
|
if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
|
|
goto split;
|
|
gaps |= bvec_seg_gap(bvprvp, &bv);
|
|
}
|
|
|
|
if (nsegs < lim->max_segments &&
|
|
bytes + bv.bv_len <= max_bytes &&
|
|
bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) {
|
|
nsegs++;
|
|
bytes += bv.bv_len;
|
|
} else {
|
|
if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
|
|
lim->max_segments, max_bytes))
|
|
goto split;
|
|
}
|
|
|
|
bvprv = bv;
|
|
bvprvp = &bvprv;
|
|
}
|
|
|
|
*segs = nsegs;
|
|
bio->bi_bvec_gap_bit = ffs(gaps);
|
|
return 0;
|
|
split:
|
|
if (bio->bi_opf & REQ_ATOMIC)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* We can't sanely support splitting for a REQ_NOWAIT bio. End it
|
|
* with EAGAIN if splitting is required and return an error pointer.
|
|
*/
|
|
if (bio->bi_opf & REQ_NOWAIT)
|
|
return -EAGAIN;
|
|
|
|
*segs = nsegs;
|
|
|
|
/*
|
|
* Individual bvecs might not be logical block aligned. Round down the
|
|
* split size so that each bio is properly block size aligned, even if
|
|
* we do not use the full hardware limits.
|
|
*
|
|
* It is possible to submit a bio that can't be split into a valid io:
|
|
* there may either be too many discontiguous vectors for the max
|
|
* segments limit, or contain virtual boundary gaps without having a
|
|
* valid block sized split. A zero byte result means one of those
|
|
* conditions occured.
|
|
*/
|
|
bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
|
|
if (!bytes)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Bio splitting may cause subtle trouble such as hang when doing sync
|
|
* iopoll in direct IO routine. Given performance gain of iopoll for
|
|
* big IO can be trival, disable iopoll when split needed.
|
|
*/
|
|
bio_clear_polled(bio);
|
|
bio->bi_bvec_gap_bit = ffs(gaps);
|
|
return bytes >> SECTOR_SHIFT;
|
|
}
|
|
EXPORT_SYMBOL_GPL(bio_split_io_at);
|
|
|
|
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
|
|
unsigned *nr_segs)
|
|
{
|
|
return bio_submit_split(bio,
|
|
bio_split_rw_at(bio, lim, nr_segs,
|
|
get_max_io_size(bio, lim) << SECTOR_SHIFT));
|
|
}
|
|
|
|
/*
|
|
* REQ_OP_ZONE_APPEND bios must never be split by the block layer.
|
|
*
|
|
* But we want the nr_segs calculation provided by bio_split_rw_at, and having
|
|
* a good sanity check that the submitter built the bio correctly is nice to
|
|
* have as well.
|
|
*/
|
|
struct bio *bio_split_zone_append(struct bio *bio,
|
|
const struct queue_limits *lim, unsigned *nr_segs)
|
|
{
|
|
int split_sectors;
|
|
|
|
split_sectors = bio_split_rw_at(bio, lim, nr_segs,
|
|
lim->max_zone_append_sectors << SECTOR_SHIFT);
|
|
if (WARN_ON_ONCE(split_sectors > 0))
|
|
split_sectors = -EINVAL;
|
|
return bio_submit_split(bio, split_sectors);
|
|
}
|
|
|
|
struct bio *bio_split_write_zeroes(struct bio *bio,
|
|
const struct queue_limits *lim, unsigned *nsegs)
|
|
{
|
|
unsigned int max_sectors = get_max_io_size(bio, lim);
|
|
|
|
*nsegs = 0;
|
|
|
|
/*
|
|
* An unset limit should normally not happen, as bio submission is keyed
|
|
* off having a non-zero limit. But SCSI can clear the limit in the
|
|
* I/O completion handler, and we can race and see this. Splitting to a
|
|
* zero limit obviously doesn't make sense, so band-aid it here.
|
|
*/
|
|
if (!max_sectors)
|
|
return bio;
|
|
if (bio_sectors(bio) <= max_sectors)
|
|
return bio;
|
|
return bio_submit_split(bio, max_sectors);
|
|
}
|
|
|
|
/**
|
|
* bio_split_to_limits - split a bio to fit the queue limits
|
|
* @bio: bio to be split
|
|
*
|
|
* Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
|
|
* if so split off a bio fitting the limits from the beginning of @bio and
|
|
* return it. @bio is shortened to the remainder and re-submitted.
|
|
*
|
|
* The split bio is allocated from @q->bio_split, which is provided by the
|
|
* block layer.
|
|
*/
|
|
struct bio *bio_split_to_limits(struct bio *bio)
|
|
{
|
|
unsigned int nr_segs;
|
|
|
|
return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
|
|
}
|
|
EXPORT_SYMBOL(bio_split_to_limits);
|
|
|
|
unsigned int blk_recalc_rq_segments(struct request *rq)
|
|
{
|
|
unsigned int nr_phys_segs = 0;
|
|
unsigned int bytes = 0;
|
|
struct req_iterator iter;
|
|
struct bio_vec bv;
|
|
|
|
if (!rq->bio)
|
|
return 0;
|
|
|
|
switch (bio_op(rq->bio)) {
|
|
case REQ_OP_DISCARD:
|
|
case REQ_OP_SECURE_ERASE:
|
|
if (queue_max_discard_segments(rq->q) > 1) {
|
|
struct bio *bio = rq->bio;
|
|
|
|
for_each_bio(bio)
|
|
nr_phys_segs++;
|
|
return nr_phys_segs;
|
|
}
|
|
return 1;
|
|
case REQ_OP_WRITE_ZEROES:
|
|
return 0;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
rq_for_each_bvec(bv, rq, iter)
|
|
bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
|
|
UINT_MAX, BIO_MAX_SIZE);
|
|
return nr_phys_segs;
|
|
}
|
|
|
|
static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
|
|
sector_t offset)
|
|
{
|
|
struct request_queue *q = rq->q;
|
|
struct queue_limits *lim = &q->limits;
|
|
unsigned int max_sectors, boundary_sectors;
|
|
bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
|
|
|
|
if (blk_rq_is_passthrough(rq))
|
|
return q->limits.max_hw_sectors;
|
|
|
|
boundary_sectors = blk_boundary_sectors(lim, is_atomic);
|
|
max_sectors = blk_queue_get_max_sectors(rq);
|
|
|
|
if (!boundary_sectors ||
|
|
req_op(rq) == REQ_OP_DISCARD ||
|
|
req_op(rq) == REQ_OP_SECURE_ERASE)
|
|
return max_sectors;
|
|
return min(max_sectors,
|
|
blk_boundary_sectors_left(offset, boundary_sectors));
|
|
}
|
|
|
|
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
|
unsigned int nr_phys_segs)
|
|
{
|
|
if (!blk_cgroup_mergeable(req, bio))
|
|
goto no_merge;
|
|
|
|
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
|
goto no_merge;
|
|
|
|
/* discard request merge won't add new segment */
|
|
if (req_op(req) == REQ_OP_DISCARD)
|
|
return 1;
|
|
|
|
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
|
goto no_merge;
|
|
|
|
/*
|
|
* This will form the start of a new hw segment. Bump both
|
|
* counters.
|
|
*/
|
|
req->nr_phys_segments += nr_phys_segs;
|
|
if (bio_integrity(bio))
|
|
req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
|
|
bio);
|
|
return 1;
|
|
|
|
no_merge:
|
|
req_set_nomerge(req->q, req);
|
|
return 0;
|
|
}
|
|
|
|
int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
|
|
{
|
|
if (req_gap_back_merge(req, bio))
|
|
return 0;
|
|
if (blk_integrity_rq(req) &&
|
|
integrity_req_gap_back_merge(req, bio))
|
|
return 0;
|
|
if (!bio_crypt_ctx_back_mergeable(req, bio))
|
|
return 0;
|
|
if (blk_rq_sectors(req) + bio_sectors(bio) >
|
|
blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
|
|
req_set_nomerge(req->q, req);
|
|
return 0;
|
|
}
|
|
|
|
return ll_new_hw_segment(req, bio, nr_segs);
|
|
}
|
|
|
|
static int ll_front_merge_fn(struct request *req, struct bio *bio,
|
|
unsigned int nr_segs)
|
|
{
|
|
if (req_gap_front_merge(req, bio))
|
|
return 0;
|
|
if (blk_integrity_rq(req) &&
|
|
integrity_req_gap_front_merge(req, bio))
|
|
return 0;
|
|
if (!bio_crypt_ctx_front_mergeable(req, bio))
|
|
return 0;
|
|
if (blk_rq_sectors(req) + bio_sectors(bio) >
|
|
blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
|
|
req_set_nomerge(req->q, req);
|
|
return 0;
|
|
}
|
|
|
|
return ll_new_hw_segment(req, bio, nr_segs);
|
|
}
|
|
|
|
static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
|
|
struct request *next)
|
|
{
|
|
unsigned short segments = blk_rq_nr_discard_segments(req);
|
|
|
|
if (segments >= queue_max_discard_segments(q))
|
|
goto no_merge;
|
|
if (blk_rq_sectors(req) + bio_sectors(next->bio) >
|
|
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
|
|
goto no_merge;
|
|
|
|
req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
|
|
return true;
|
|
no_merge:
|
|
req_set_nomerge(q, req);
|
|
return false;
|
|
}
|
|
|
|
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
|
|
struct request *next)
|
|
{
|
|
int total_phys_segments;
|
|
|
|
if (req_gap_back_merge(req, next->bio))
|
|
return 0;
|
|
|
|
/*
|
|
* Will it become too large?
|
|
*/
|
|
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
|
|
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
|
|
return 0;
|
|
|
|
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
|
|
if (total_phys_segments > blk_rq_get_max_segments(req))
|
|
return 0;
|
|
|
|
if (!blk_cgroup_mergeable(req, next->bio))
|
|
return 0;
|
|
|
|
if (blk_integrity_merge_rq(q, req, next) == false)
|
|
return 0;
|
|
|
|
if (!bio_crypt_ctx_merge_rq(req, next))
|
|
return 0;
|
|
|
|
/* Merge is OK... */
|
|
req->nr_phys_segments = total_phys_segments;
|
|
req->nr_integrity_segments += next->nr_integrity_segments;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* blk_rq_set_mixed_merge - mark a request as mixed merge
|
|
* @rq: request to mark as mixed merge
|
|
*
|
|
* Description:
|
|
* @rq is about to be mixed merged. Make sure the attributes
|
|
* which can be mixed are set in each bio and mark @rq as mixed
|
|
* merged.
|
|
*/
|
|
static void blk_rq_set_mixed_merge(struct request *rq)
|
|
{
|
|
blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
|
|
struct bio *bio;
|
|
|
|
if (rq->rq_flags & RQF_MIXED_MERGE)
|
|
return;
|
|
|
|
/*
|
|
* @rq will no longer represent mixable attributes for all the
|
|
* contained bios. It will just track those of the first one.
|
|
* Distributes the attributs to each bio.
|
|
*/
|
|
for (bio = rq->bio; bio; bio = bio->bi_next) {
|
|
WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
|
|
(bio->bi_opf & REQ_FAILFAST_MASK) != ff);
|
|
bio->bi_opf |= ff;
|
|
}
|
|
rq->rq_flags |= RQF_MIXED_MERGE;
|
|
}
|
|
|
|
static inline blk_opf_t bio_failfast(const struct bio *bio)
|
|
{
|
|
if (bio->bi_opf & REQ_RAHEAD)
|
|
return REQ_FAILFAST_MASK;
|
|
|
|
return bio->bi_opf & REQ_FAILFAST_MASK;
|
|
}
|
|
|
|
/*
|
|
* After we are marked as MIXED_MERGE, any new RA bio has to be updated
|
|
* as failfast, and request's failfast has to be updated in case of
|
|
* front merge.
|
|
*/
|
|
static inline void blk_update_mixed_merge(struct request *req,
|
|
struct bio *bio, bool front_merge)
|
|
{
|
|
if (req->rq_flags & RQF_MIXED_MERGE) {
|
|
if (bio->bi_opf & REQ_RAHEAD)
|
|
bio->bi_opf |= REQ_FAILFAST_MASK;
|
|
|
|
if (front_merge) {
|
|
req->cmd_flags &= ~REQ_FAILFAST_MASK;
|
|
req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void blk_account_io_merge_request(struct request *req)
|
|
{
|
|
if (req->rq_flags & RQF_IO_STAT) {
|
|
part_stat_lock();
|
|
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
|
part_stat_local_dec(req->part,
|
|
in_flight[op_is_write(req_op(req))]);
|
|
part_stat_unlock();
|
|
}
|
|
}
|
|
|
|
static enum elv_merge blk_try_req_merge(struct request *req,
|
|
struct request *next)
|
|
{
|
|
if (blk_discard_mergable(req))
|
|
return ELEVATOR_DISCARD_MERGE;
|
|
else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
|
|
return ELEVATOR_BACK_MERGE;
|
|
|
|
return ELEVATOR_NO_MERGE;
|
|
}
|
|
|
|
static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
|
|
struct bio *bio)
|
|
{
|
|
return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
|
|
}
|
|
|
|
static bool blk_atomic_write_mergeable_rqs(struct request *rq,
|
|
struct request *next)
|
|
{
|
|
return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
|
|
}
|
|
|
|
u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
|
|
u8 gaps_bit)
|
|
{
|
|
struct bio_vec pb, nb;
|
|
|
|
if (!bio_has_data(prev))
|
|
return 0;
|
|
|
|
gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit);
|
|
gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit);
|
|
|
|
bio_get_last_bvec(prev, &pb);
|
|
bio_get_first_bvec(next, &nb);
|
|
if (!biovec_phys_mergeable(q, &pb, &nb))
|
|
gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb)));
|
|
return gaps_bit;
|
|
}
|
|
|
|
/*
|
|
* For non-mq, this has to be called with the request spinlock acquired.
|
|
* For mq with scheduling, the appropriate queue wide lock should be held.
|
|
*/
|
|
static struct request *attempt_merge(struct request_queue *q,
|
|
struct request *req, struct request *next)
|
|
{
|
|
if (!rq_mergeable(req) || !rq_mergeable(next))
|
|
return NULL;
|
|
|
|
if (req_op(req) != req_op(next))
|
|
return NULL;
|
|
|
|
if (req->bio->bi_write_hint != next->bio->bi_write_hint)
|
|
return NULL;
|
|
if (req->bio->bi_write_stream != next->bio->bi_write_stream)
|
|
return NULL;
|
|
if (req->bio->bi_ioprio != next->bio->bi_ioprio)
|
|
return NULL;
|
|
if (!blk_atomic_write_mergeable_rqs(req, next))
|
|
return NULL;
|
|
|
|
/*
|
|
* If we are allowed to merge, then append bio list
|
|
* from next to rq and release next. merge_requests_fn
|
|
* will have updated segment counts, update sector
|
|
* counts here. Handle DISCARDs separately, as they
|
|
* have separate settings.
|
|
*/
|
|
|
|
switch (blk_try_req_merge(req, next)) {
|
|
case ELEVATOR_DISCARD_MERGE:
|
|
if (!req_attempt_discard_merge(q, req, next))
|
|
return NULL;
|
|
break;
|
|
case ELEVATOR_BACK_MERGE:
|
|
if (!ll_merge_requests_fn(q, req, next))
|
|
return NULL;
|
|
break;
|
|
default:
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* If failfast settings disagree or any of the two is already
|
|
* a mixed merge, mark both as mixed before proceeding. This
|
|
* makes sure that all involved bios have mixable attributes
|
|
* set properly.
|
|
*/
|
|
if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
|
|
(req->cmd_flags & REQ_FAILFAST_MASK) !=
|
|
(next->cmd_flags & REQ_FAILFAST_MASK)) {
|
|
blk_rq_set_mixed_merge(req);
|
|
blk_rq_set_mixed_merge(next);
|
|
}
|
|
|
|
/*
|
|
* At this point we have either done a back merge or front merge. We
|
|
* need the smaller start_time_ns of the merged requests to be the
|
|
* current request for accounting purposes.
|
|
*/
|
|
if (next->start_time_ns < req->start_time_ns)
|
|
req->start_time_ns = next->start_time_ns;
|
|
|
|
req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio,
|
|
min_not_zero(next->phys_gap_bit,
|
|
req->phys_gap_bit));
|
|
req->biotail->bi_next = next->bio;
|
|
req->biotail = next->biotail;
|
|
|
|
req->__data_len += blk_rq_bytes(next);
|
|
|
|
if (!blk_discard_mergable(req))
|
|
elv_merge_requests(q, req, next);
|
|
|
|
blk_crypto_rq_put_keyslot(next);
|
|
|
|
/*
|
|
* 'next' is going away, so update stats accordingly
|
|
*/
|
|
blk_account_io_merge_request(next);
|
|
|
|
trace_block_rq_merge(next);
|
|
|
|
/*
|
|
* ownership of bio passed from next to req, return 'next' for
|
|
* the caller to free
|
|
*/
|
|
next->bio = NULL;
|
|
return next;
|
|
}
|
|
|
|
static struct request *attempt_back_merge(struct request_queue *q,
|
|
struct request *rq)
|
|
{
|
|
struct request *next = elv_latter_request(q, rq);
|
|
|
|
if (next)
|
|
return attempt_merge(q, rq, next);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct request *attempt_front_merge(struct request_queue *q,
|
|
struct request *rq)
|
|
{
|
|
struct request *prev = elv_former_request(q, rq);
|
|
|
|
if (prev)
|
|
return attempt_merge(q, prev, rq);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Try to merge 'next' into 'rq'. Return true if the merge happened, false
|
|
* otherwise. The caller is responsible for freeing 'next' if the merge
|
|
* happened.
|
|
*/
|
|
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
|
struct request *next)
|
|
{
|
|
return attempt_merge(q, rq, next);
|
|
}
|
|
|
|
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
|
{
|
|
if (!rq_mergeable(rq) || !bio_mergeable(bio))
|
|
return false;
|
|
|
|
if (req_op(rq) != bio_op(bio))
|
|
return false;
|
|
|
|
if (!blk_cgroup_mergeable(rq, bio))
|
|
return false;
|
|
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
|
|
return false;
|
|
if (!bio_crypt_rq_ctx_compatible(rq, bio))
|
|
return false;
|
|
if (rq->bio->bi_write_hint != bio->bi_write_hint)
|
|
return false;
|
|
if (rq->bio->bi_write_stream != bio->bi_write_stream)
|
|
return false;
|
|
if (rq->bio->bi_ioprio != bio->bi_ioprio)
|
|
return false;
|
|
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
|
|
{
|
|
if (blk_discard_mergable(rq))
|
|
return ELEVATOR_DISCARD_MERGE;
|
|
else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
|
|
return ELEVATOR_BACK_MERGE;
|
|
else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
|
|
return ELEVATOR_FRONT_MERGE;
|
|
return ELEVATOR_NO_MERGE;
|
|
}
|
|
|
|
static void blk_account_io_merge_bio(struct request *req)
|
|
{
|
|
if (req->rq_flags & RQF_IO_STAT) {
|
|
part_stat_lock();
|
|
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
|
part_stat_unlock();
|
|
}
|
|
}
|
|
|
|
enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
|
struct bio *bio, unsigned int nr_segs)
|
|
{
|
|
const blk_opf_t ff = bio_failfast(bio);
|
|
|
|
if (!ll_back_merge_fn(req, bio, nr_segs))
|
|
return BIO_MERGE_FAILED;
|
|
|
|
trace_block_bio_backmerge(bio);
|
|
rq_qos_merge(req->q, req, bio);
|
|
|
|
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
|
blk_rq_set_mixed_merge(req);
|
|
|
|
blk_update_mixed_merge(req, bio, false);
|
|
|
|
if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
|
|
blk_zone_write_plug_bio_merged(bio);
|
|
|
|
req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio,
|
|
req->phys_gap_bit);
|
|
req->biotail->bi_next = bio;
|
|
req->biotail = bio;
|
|
req->__data_len += bio->bi_iter.bi_size;
|
|
|
|
bio_crypt_free_ctx(bio);
|
|
|
|
blk_account_io_merge_bio(req);
|
|
return BIO_MERGE_OK;
|
|
}
|
|
|
|
static enum bio_merge_status bio_attempt_front_merge(struct request *req,
|
|
struct bio *bio, unsigned int nr_segs)
|
|
{
|
|
const blk_opf_t ff = bio_failfast(bio);
|
|
|
|
/*
|
|
* A front merge for writes to sequential zones of a zoned block device
|
|
* can happen only if the user submitted writes out of order. Do not
|
|
* merge such write to let it fail.
|
|
*/
|
|
if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
|
|
return BIO_MERGE_FAILED;
|
|
|
|
if (!ll_front_merge_fn(req, bio, nr_segs))
|
|
return BIO_MERGE_FAILED;
|
|
|
|
trace_block_bio_frontmerge(bio);
|
|
rq_qos_merge(req->q, req, bio);
|
|
|
|
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
|
blk_rq_set_mixed_merge(req);
|
|
|
|
blk_update_mixed_merge(req, bio, true);
|
|
|
|
req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio,
|
|
req->phys_gap_bit);
|
|
bio->bi_next = req->bio;
|
|
req->bio = bio;
|
|
|
|
req->__sector = bio->bi_iter.bi_sector;
|
|
req->__data_len += bio->bi_iter.bi_size;
|
|
|
|
bio_crypt_do_front_merge(req, bio);
|
|
|
|
blk_account_io_merge_bio(req);
|
|
return BIO_MERGE_OK;
|
|
}
|
|
|
|
static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
|
|
struct request *req, struct bio *bio)
|
|
{
|
|
unsigned short segments = blk_rq_nr_discard_segments(req);
|
|
|
|
if (segments >= queue_max_discard_segments(q))
|
|
goto no_merge;
|
|
if (blk_rq_sectors(req) + bio_sectors(bio) >
|
|
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
|
|
goto no_merge;
|
|
|
|
rq_qos_merge(q, req, bio);
|
|
|
|
req->biotail->bi_next = bio;
|
|
req->biotail = bio;
|
|
req->__data_len += bio->bi_iter.bi_size;
|
|
req->nr_phys_segments = segments + 1;
|
|
|
|
blk_account_io_merge_bio(req);
|
|
return BIO_MERGE_OK;
|
|
no_merge:
|
|
req_set_nomerge(q, req);
|
|
return BIO_MERGE_FAILED;
|
|
}
|
|
|
|
static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
|
|
struct request *rq,
|
|
struct bio *bio,
|
|
unsigned int nr_segs,
|
|
bool sched_allow_merge)
|
|
{
|
|
if (!blk_rq_merge_ok(rq, bio))
|
|
return BIO_MERGE_NONE;
|
|
|
|
switch (blk_try_merge(rq, bio)) {
|
|
case ELEVATOR_BACK_MERGE:
|
|
if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
|
|
return bio_attempt_back_merge(rq, bio, nr_segs);
|
|
break;
|
|
case ELEVATOR_FRONT_MERGE:
|
|
if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
|
|
return bio_attempt_front_merge(rq, bio, nr_segs);
|
|
break;
|
|
case ELEVATOR_DISCARD_MERGE:
|
|
return bio_attempt_discard_merge(q, rq, bio);
|
|
default:
|
|
return BIO_MERGE_NONE;
|
|
}
|
|
|
|
return BIO_MERGE_FAILED;
|
|
}
|
|
|
|
/**
|
|
* blk_attempt_plug_merge - try to merge with %current's plugged list
|
|
* @q: request_queue new bio is being queued at
|
|
* @bio: new bio being queued
|
|
* @nr_segs: number of segments in @bio
|
|
* from the passed in @q already in the plug list
|
|
*
|
|
* Determine whether @bio being queued on @q can be merged with the previous
|
|
* request on %current's plugged list. Returns %true if merge was successful,
|
|
* otherwise %false.
|
|
*
|
|
* Plugging coalesces IOs from the same issuer for the same purpose without
|
|
* going through @q->queue_lock. As such it's more of an issuing mechanism
|
|
* than scheduling, and the request, while may have elvpriv data, is not
|
|
* added on the elevator at this point. In addition, we don't have
|
|
* reliable access to the elevator outside queue lock. Only check basic
|
|
* merging parameters without querying the elevator.
|
|
*
|
|
* Caller must ensure !blk_queue_nomerges(q) beforehand.
|
|
*/
|
|
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
|
unsigned int nr_segs)
|
|
{
|
|
struct blk_plug *plug = current->plug;
|
|
struct request *rq;
|
|
|
|
if (!plug || rq_list_empty(&plug->mq_list))
|
|
return false;
|
|
|
|
rq = plug->mq_list.tail;
|
|
if (rq->q == q)
|
|
return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
|
|
BIO_MERGE_OK;
|
|
else if (!plug->multiple_queues)
|
|
return false;
|
|
|
|
rq_list_for_each(&plug->mq_list, rq) {
|
|
if (rq->q != q)
|
|
continue;
|
|
if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
|
|
BIO_MERGE_OK)
|
|
return true;
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Iterate list of requests and see if we can merge this bio with any
|
|
* of them.
|
|
*/
|
|
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
|
|
struct bio *bio, unsigned int nr_segs)
|
|
{
|
|
struct request *rq;
|
|
int checked = 8;
|
|
|
|
list_for_each_entry_reverse(rq, list, queuelist) {
|
|
if (!checked--)
|
|
break;
|
|
|
|
switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
|
|
case BIO_MERGE_NONE:
|
|
continue;
|
|
case BIO_MERGE_OK:
|
|
return true;
|
|
case BIO_MERGE_FAILED:
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_bio_list_merge);
|
|
|
|
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
|
unsigned int nr_segs, struct request **merged_request)
|
|
{
|
|
struct request *rq;
|
|
|
|
switch (elv_merge(q, &rq, bio)) {
|
|
case ELEVATOR_BACK_MERGE:
|
|
if (!blk_mq_sched_allow_merge(q, rq, bio))
|
|
return false;
|
|
if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
|
|
return false;
|
|
*merged_request = attempt_back_merge(q, rq);
|
|
if (!*merged_request)
|
|
elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
|
|
return true;
|
|
case ELEVATOR_FRONT_MERGE:
|
|
if (!blk_mq_sched_allow_merge(q, rq, bio))
|
|
return false;
|
|
if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
|
|
return false;
|
|
*merged_request = attempt_front_merge(q, rq);
|
|
if (!*merged_request)
|
|
elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
|
|
return true;
|
|
case ELEVATOR_DISCARD_MERGE:
|
|
return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
|