bdev_nvme: fix heap-use-after-free when detaching controller

There is heap-use-after-free error when detaching a controller
when "io_path_stat" option set as true.
(if build spdk without asan ubsan, error is free(): corrupted
unsorted chunks)

It's because io_path is accessed in bdev_nvme_io_complete_nvme_status
after the io_path is freed.

io_path is freed when we detach the controller in function
_bdev_nvme_delete_io_path, this function will execute 1 and 2.
And before 4 is executed, 3 may be executed which accesses io_path.

1.spdk_put_io_channel() is called. bdev_nvme_destroy_ctrlr_channel_cb
has not been called.
2.free(io_path->stat); free(io_path);
3.bdev_nvme_poll; nbdev_io1 is success; bdev_nvme_io_complete_nvme_status()
access nbdev_io1->io_path.
4.bdev_nvme_destroy_ctrlr_channel_cb disconnect qpair and abort nbdev_io1.

This patch fixed this by moving 2 down under 4. We don't free io_path in
_bdev_nvme_delete_io_path but just remove from the nbdev_ch->io_path_list.

The processes to reproduce the error:
target: run nvmf_tgt
initiator: (build spdk with asan,ubsan enabled)
sudo ./build/examples/bdevperf --json bdevperf-multipath-rdma-active-active.json  -r tmp.sock -q 128 -o 4096  -w randrw -M 50 -t 120
sudo ./scripts/rpc.py -s tmp.sock  bdev_nvme_detach_controller -t rdma -a 10.10.10.10 -f IPv4 -s 4420 -n nqn.2016-06.io.spdk:cnode1 NVMe0

========
bdevperf-multipath-rdma-active-active.json

{
  "subsystems": [
  {
    "subsystem": "bdev",
    "config": [
       {
         "method":"bdev_nvme_attach_controller",
         "params": {
           "name": "NVMe0",
           "trtype": "tcp",
           "traddr": "10.169.204.201",
           "trsvcid": "4420",
           "subnqn": "nqn.2016-06.io.spdk:cnode1",
           "hostnqn": "nqn.2016-06.io.spdk:init",
           "adrfam": "IPv4"
        }
      },
      {
        "method":"bdev_nvme_attach_controller",
        "params": {
        "name": "NVMe0",
        "trtype": "rdma",
         "traddr": "10.10.10.10",
           "trsvcid": "4420",
           "subnqn": "nqn.2016-06.io.spdk:cnode1",
           "hostnqn": "nqn.2016-06.io.spdk:init",
           "adrfam": "IPv4",
           "multipath": "multipath"
        }
    },
    {
       "method":"bdev_nvme_set_multipath_policy",
       "params": {
         "name": "NVMe0n1",
         "policy": "active_active"
       }
    },
    {
       "method":"bdev_nvme_set_options",
         "params": {
           "io_path_stat": true
         }
    }
    ]
    }
  ]
}
======

Change-Id: I8f4f9dc7195f49992a5ba9798613b64d44266e5e
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17581
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
Richael Zhuang 2023-04-12 16:26:27 +08:00 committed by Konrad Sztyber
parent e351b19055
commit 953b74b9b0

View File

@ -582,6 +582,37 @@ _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_
return io_path;
}
static struct nvme_io_path *
nvme_io_path_alloc(void)
{
struct nvme_io_path *io_path;
io_path = calloc(1, sizeof(*io_path));
if (io_path == NULL) {
SPDK_ERRLOG("Failed to alloc io_path.\n");
return NULL;
}
if (g_opts.io_path_stat) {
io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
if (io_path->stat == NULL) {
free(io_path);
SPDK_ERRLOG("Failed to alloc io_path stat.\n");
return NULL;
}
spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
}
return io_path;
}
static void
nvme_io_path_free(struct nvme_io_path *io_path)
{
free(io_path->stat);
free(io_path);
}
static int
_bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
{
@ -590,28 +621,16 @@ _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_
struct nvme_ctrlr_channel *ctrlr_ch;
struct nvme_qpair *nvme_qpair;
io_path = calloc(1, sizeof(*io_path));
io_path = nvme_io_path_alloc();
if (io_path == NULL) {
SPDK_ERRLOG("Failed to alloc io_path.\n");
return -ENOMEM;
}
if (g_opts.io_path_stat) {
io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
if (io_path->stat == NULL) {
free(io_path);
SPDK_ERRLOG("Failed to alloc io_path stat.\n");
return -ENOMEM;
}
spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
}
io_path->nvme_ns = nvme_ns;
ch = spdk_get_io_channel(nvme_ns->ctrlr);
if (ch == NULL) {
free(io_path->stat);
free(io_path);
nvme_io_path_free(io_path);
SPDK_ERRLOG("Failed to alloc io_channel.\n");
return -ENOMEM;
}
@ -652,20 +671,22 @@ _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_pat
bdev_nvme_clear_current_io_path(nbdev_ch);
STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
io_path->nbdev_ch = NULL;
nvme_qpair = io_path->qpair;
assert(nvme_qpair != NULL);
TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
ctrlr_ch = nvme_qpair->ctrlr_ch;
assert(ctrlr_ch != NULL);
ch = spdk_io_channel_from_ctx(ctrlr_ch);
spdk_put_io_channel(ch);
free(io_path->stat);
free(io_path);
/* After an io_path is removed, I/Os submitted to it may complete and update statistics
* of the io_path. To avoid heap-use-after-free error from this case, do not free the
* io_path here but free the io_path when the associated qpair is freed. It is ensured
* that all I/Os submitted to the io_path are completed when the associated qpair is freed.
*/
}
static void
@ -1401,6 +1422,9 @@ _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
struct nvme_io_path *io_path;
TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
if (io_path->nbdev_ch == NULL) {
continue;
}
bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
}
}
@ -2624,8 +2648,15 @@ bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
static void
nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
{
struct nvme_io_path *io_path, *next;
assert(nvme_qpair->group != NULL);
TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
nvme_io_path_free(io_path);
}
TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
@ -7249,7 +7280,8 @@ nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_pa
trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
spdk_json_write_named_bool(w, "current", io_path == io_path->nbdev_ch->current_io_path);
spdk_json_write_named_bool(w, "current", io_path->nbdev_ch != NULL &&
io_path == io_path->nbdev_ch->current_io_path);
spdk_json_write_named_bool(w, "connected", nvme_io_path_is_connected(io_path));
spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));