Merge tag 'block-5.16-2021-12-10' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A few block fixes that should go into this release:
- NVMe pull request:
- set ana_log_size to 0 after freeing ana_log_buf (Hou Tao)
- show subsys nqn for duplicate cntlids (Keith Busch)
- disable namespace access for unsupported metadata (Keith
Busch)
- report write pointer for a full zone as zone start + zone len
(Niklas Cassel)
- fix use after free when disconnecting a reconnecting ctrl
(Ruozhu Li)
- fix a list corruption in nvmet-tcp (Sagi Grimberg)
- Fix for a regression on DIO single bio async IO (Pavel)
- ioprio seteuid fix (Davidlohr)
- mtd fix that subsequently got reverted as it was broken, will get
re-done and submitted for the next round
- Two MD fixes via Song (Markus, zhangyue)"
* tag 'block-5.16-2021-12-10' of git://git.kernel.dk/linux-block:
Revert "mtd_blkdevs: don't scan partitions for plain mtdblock"
block: fix ioprio_get(IOPRIO_WHO_PGRP) vs setuid(2)
md: fix double free of mddev->private in autorun_array()
md: fix update super 1.0 on rdev size change
nvmet-tcp: fix possible list corruption for unexpected command failure
block: fix single bio async DIO error handling
nvme: fix use after free when disconnecting a reconnecting ctrl
nvme-multipath: set ana_log_size to 0 after free ana_log_buf
mtd_blkdevs: don't scan partitions for plain mtdblock
nvme: report write pointer for a full zone as zone start + zone len
nvme: disable namespace access for unsupported metadata
nvme: show subsys nqn for duplicate cntlids
diff --git a/block/fops.c b/block/fops.c
index 3cb1e81..0da147e 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -341,8 +341,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
} else {
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
+ bio_put(bio);
return ret;
}
}
diff --git a/block/ioprio.c b/block/ioprio.c
index 313c14a..6f01d35 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -220,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
pgrp = task_pgrp(current);
else
pgrp = find_vpid(who);
+ read_lock(&tasklist_lock);
do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
tmpio = get_task_ioprio(p);
if (tmpio < 0)
@@ -229,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
else
ret = ioprio_best(ret, tmpio);
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
+ read_unlock(&tasklist_lock);
+
break;
case IOPRIO_WHO_USER:
uid = make_kuid(current_user_ns(), who);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5111ed9..41d6e23 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2189,6 +2189,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
if (!num_sectors || num_sectors > max_sectors)
num_sectors = max_sectors;
+ rdev->sb_start = sb_start;
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
@@ -6270,7 +6271,8 @@ static void __md_stop(struct mddev *mddev)
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
- pers->free(mddev, mddev->private);
+ if (mddev->private)
+ pers->free(mddev, mddev->private);
mddev->private = NULL;
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4c63564..1af8a45 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -666,6 +666,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
+ ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
@@ -1749,9 +1750,20 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
*/
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
return -EINVAL;
- if (ctrl->max_integrity_segments)
- ns->features |=
- (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+
+ ns->features |= NVME_NS_EXT_LBAS;
+
+ /*
+ * The current fabrics transport drivers support namespace
+ * metadata formats only if nvme_ns_has_pi() returns true.
+ * Suppress support for all other formats so the namespace will
+ * have a 0 capacity and not be usable through the block stack.
+ *
+ * Note, this check will need to be modified if any drivers
+ * gain the ability to use other metadata formats.
+ */
+ if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns))
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
} else {
/*
* For PCIe controllers, we can't easily remap the separate
@@ -2696,8 +2708,9 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
if (tmp->cntlid == ctrl->cntlid) {
dev_err(ctrl->device,
- "Duplicate cntlid %u with %s, rejecting\n",
- ctrl->cntlid, dev_name(tmp->device));
+ "Duplicate cntlid %u with %s, subsys %s, rejecting\n",
+ ctrl->cntlid, dev_name(tmp->device),
+ subsys->subnqn);
return false;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 7f2071f..13e5d50 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -866,7 +866,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
}
if (ana_log_size > ctrl->ana_log_size) {
nvme_mpath_stop(ctrl);
- kfree(ctrl->ana_log_buf);
+ nvme_mpath_uninit(ctrl);
ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
return -ENOMEM;
@@ -886,4 +886,5 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = NULL;
+ ctrl->ana_log_size = 0;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b334af8..9b095ee0 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -709,7 +709,7 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
return true;
if (ctrl->ops->flags & NVME_F_FABRICS &&
ctrl->state == NVME_CTRL_DELETING)
- return true;
+ return queue_live;
return __nvme_check_ready(ctrl, rq, queue_live);
}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index bfc259e..9f81beb 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -166,7 +166,10 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
zone.len = ns->zsze;
zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
- zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
+ if (zone.cond == BLK_ZONE_COND_FULL)
+ zone.wp = zone.start + zone.len;
+ else
+ zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
return cb(&zone, idx, data);
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index cb6a473..7c1c43c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -922,7 +922,14 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
int ret;
- if (!nvme_is_write(cmd->req.cmd) ||
+ /*
+ * This command has not been processed yet, hence we are trying to
+ * figure out if there is still pending data left to receive. If
+ * we don't, we can simply prepare for the next pdu and bail out,
+ * otherwise we will need to prepare a buffer and receive the
+ * stale data before continuing forward.
+ */
+ if (!nvme_is_write(cmd->req.cmd) || !data_len ||
data_len > cmd->req.port->inline_data_size) {
nvmet_prepare_receive_pdu(queue);
return;