bdev/rpc: Fix race condition for per channel bdev_get_iostat
With per channel stats called on thread A, spdk_bdev_for_each_channel calls spdk_for_each_channel which immediately sends a message to thread B. If thread B has no workload, it may execute the message relatively fast trying to write stats to json_write_ctx. As result, we may have 2 scenarious: 1. json_write_ctx is still not initialized on thread A, so thread B dereferences a NULL pointer. 1. json_write_ctx is initialized but thread A writes response header while thread B writes stats - it leads to corrupted json response. To fix this race condition, initialize json_write_ctx before iterating bdevs/channels Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Signed-off-by: Alexey Marchuk <alexeymar@nvidia.com> Reported-by: Or Gerlitz <ogerlitz@nvidia.com> Change-Id: I5dae37f1f527437528fc8a8e9c6066f69687dec9 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16366 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
db3869d1b2
commit
139940d6c1
@ -175,7 +175,7 @@ struct bdev_get_iostat_ctx {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_rpc_get_iostat_started(struct rpc_get_iostat_ctx *rpc_ctx)
|
rpc_get_iostat_started(struct rpc_get_iostat_ctx *rpc_ctx)
|
||||||
{
|
{
|
||||||
rpc_ctx->w = spdk_jsonrpc_begin_result(rpc_ctx->request);
|
rpc_ctx->w = spdk_jsonrpc_begin_result(rpc_ctx->request);
|
||||||
|
|
||||||
@ -184,23 +184,6 @@ _rpc_get_iostat_started(struct rpc_get_iostat_ctx *rpc_ctx)
|
|||||||
spdk_json_write_named_uint64(rpc_ctx->w, "ticks", spdk_get_ticks());
|
spdk_json_write_named_uint64(rpc_ctx->w, "ticks", spdk_get_ticks());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
rpc_get_iostat_started(struct rpc_get_iostat_ctx *rpc_ctx, struct spdk_bdev_desc *desc)
|
|
||||||
{
|
|
||||||
struct spdk_bdev *bdev;
|
|
||||||
|
|
||||||
_rpc_get_iostat_started(rpc_ctx);
|
|
||||||
|
|
||||||
if (rpc_ctx->per_channel == false) {
|
|
||||||
spdk_json_write_named_array_begin(rpc_ctx->w, "bdevs");
|
|
||||||
} else {
|
|
||||||
bdev = spdk_bdev_desc_get_bdev(desc);
|
|
||||||
|
|
||||||
spdk_json_write_named_string(rpc_ctx->w, "name", spdk_bdev_get_name(bdev));
|
|
||||||
spdk_json_write_named_array_begin(rpc_ctx->w, "channels");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rpc_get_iostat_done(struct rpc_get_iostat_ctx *rpc_ctx)
|
rpc_get_iostat_done(struct rpc_get_iostat_ctx *rpc_ctx)
|
||||||
{
|
{
|
||||||
@ -380,6 +363,7 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
|||||||
struct spdk_bdev_desc *desc = NULL;
|
struct spdk_bdev_desc *desc = NULL;
|
||||||
struct rpc_get_iostat_ctx *rpc_ctx;
|
struct rpc_get_iostat_ctx *rpc_ctx;
|
||||||
struct bdev_get_iostat_ctx *bdev_ctx;
|
struct bdev_get_iostat_ctx *bdev_ctx;
|
||||||
|
struct spdk_bdev *bdev;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (params != NULL) {
|
if (params != NULL) {
|
||||||
@ -429,6 +413,8 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
|||||||
rpc_ctx->per_channel = req.per_channel;
|
rpc_ctx->per_channel = req.per_channel;
|
||||||
|
|
||||||
if (desc != NULL) {
|
if (desc != NULL) {
|
||||||
|
bdev = spdk_bdev_desc_get_bdev(desc);
|
||||||
|
|
||||||
bdev_ctx = bdev_iostat_ctx_alloc(req.per_channel == false);
|
bdev_ctx = bdev_iostat_ctx_alloc(req.per_channel == false);
|
||||||
if (bdev_ctx == NULL) {
|
if (bdev_ctx == NULL) {
|
||||||
SPDK_ERRLOG("Failed to allocate bdev_iostat_ctx struct\n");
|
SPDK_ERRLOG("Failed to allocate bdev_iostat_ctx struct\n");
|
||||||
@ -441,13 +427,24 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
|||||||
rpc_ctx->bdev_count++;
|
rpc_ctx->bdev_count++;
|
||||||
bdev_ctx->rpc_ctx = rpc_ctx;
|
bdev_ctx->rpc_ctx = rpc_ctx;
|
||||||
if (req.per_channel == false) {
|
if (req.per_channel == false) {
|
||||||
spdk_bdev_get_device_stat(spdk_bdev_desc_get_bdev(desc), bdev_ctx->stat,
|
spdk_bdev_get_device_stat(bdev, bdev_ctx->stat, bdev_get_iostat_done,
|
||||||
bdev_get_iostat_done, bdev_ctx);
|
bdev_ctx);
|
||||||
} else {
|
} else {
|
||||||
spdk_bdev_for_each_channel(spdk_bdev_desc_get_bdev(desc),
|
/* If per_channel is true, there is no failure after here and
|
||||||
|
* we have to start RPC response before executing
|
||||||
|
* spdk_bdev_for_each_channel().
|
||||||
|
*/
|
||||||
|
rpc_get_iostat_started(rpc_ctx);
|
||||||
|
spdk_json_write_named_string(rpc_ctx->w, "name", spdk_bdev_get_name(bdev));
|
||||||
|
spdk_json_write_named_array_begin(rpc_ctx->w, "channels");
|
||||||
|
|
||||||
|
spdk_bdev_for_each_channel(bdev,
|
||||||
bdev_get_per_channel_stat,
|
bdev_get_per_channel_stat,
|
||||||
bdev_ctx,
|
bdev_ctx,
|
||||||
bdev_get_per_channel_stat_done);
|
bdev_get_per_channel_stat_done);
|
||||||
|
|
||||||
|
rpc_get_iostat_done(rpc_ctx);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -458,12 +455,12 @@ rpc_bdev_get_iostat(struct spdk_jsonrpc_request *request,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rpc_ctx->rc == 0) {
|
if (rpc_ctx->rc == 0) {
|
||||||
/* We want to fail the RPC for all failures. The callback function to
|
/* We want to fail the RPC for all failures. If per_channel is false,
|
||||||
* spdk_bdev_for_each_channel() is executed after stack unwinding if
|
* it is enough to defer starting RPC response until it is ensured that
|
||||||
* successful. Hence defer starting RPC response until it is ensured that
|
|
||||||
* all spdk_bdev_for_each_channel() calls will succeed or there is no bdev.
|
* all spdk_bdev_for_each_channel() calls will succeed or there is no bdev.
|
||||||
*/
|
*/
|
||||||
rpc_get_iostat_started(rpc_ctx, desc);
|
rpc_get_iostat_started(rpc_ctx);
|
||||||
|
spdk_json_write_named_array_begin(rpc_ctx->w, "bdevs");
|
||||||
}
|
}
|
||||||
|
|
||||||
rpc_get_iostat_done(rpc_ctx);
|
rpc_get_iostat_done(rpc_ctx);
|
||||||
|
Loading…
Reference in New Issue
Block a user