nvmf/vfio_user: mitigate cq full race
Linux host nvme driver processes all pending cqe's in one batch along with completing backing blk_mq req's and later rings cq_doorbell once for all processed cqes. As blk_mq req's are completed there is room for more submissions before ringing cq_doorbell. This may race with vfio_user cq_is_full() which uses cq_doorbell to make final decision and as host has not updated cq_doorbell we fail with cq_full error. To mitigate this only process commands from sq which have free cq slot. Signed-off-by: Swapnil Ingle <swapnil.ingle@nutanix.com> Change-Id: I0cefb41df8099eb71de25923d05a9fcb28e4d124 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16788 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
8613654074
commit
23b518a013
@ -2507,13 +2507,34 @@ consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_sq *sq,
|
|||||||
return handle_cmd_req(ctrlr, cmd, sq);
|
return handle_cmd_req(ctrlr, cmd, sq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
cq_free_slots(struct nvmf_vfio_user_cq *cq)
|
||||||
|
{
|
||||||
|
uint32_t free_slots;
|
||||||
|
|
||||||
|
assert(cq != NULL);
|
||||||
|
|
||||||
|
if (cq->tail == cq->last_head) {
|
||||||
|
free_slots = cq->size;
|
||||||
|
} else if (cq->tail > cq->last_head) {
|
||||||
|
free_slots = cq->size - (cq->tail - cq->last_head);
|
||||||
|
} else {
|
||||||
|
free_slots = cq->last_head - cq->tail;
|
||||||
|
}
|
||||||
|
assert(free_slots > 0);
|
||||||
|
|
||||||
|
return free_slots - 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns the number of commands processed, or a negative value on error. */
|
/* Returns the number of commands processed, or a negative value on error. */
|
||||||
static int
|
static int
|
||||||
handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail,
|
handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail,
|
||||||
struct nvmf_vfio_user_sq *sq)
|
struct nvmf_vfio_user_sq *sq)
|
||||||
{
|
{
|
||||||
struct spdk_nvme_cmd *queue;
|
struct spdk_nvme_cmd *queue;
|
||||||
|
struct nvmf_vfio_user_cq *cq = ctrlr->cqs[sq->cqid];
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
uint32_t free_cq_slots;
|
||||||
|
|
||||||
assert(ctrlr != NULL);
|
assert(ctrlr != NULL);
|
||||||
assert(sq != NULL);
|
assert(sq != NULL);
|
||||||
@ -2526,11 +2547,38 @@ handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail
|
|||||||
sq->need_rearm = true;
|
sq->need_rearm = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free_cq_slots = cq_free_slots(cq);
|
||||||
queue = q_addr(&sq->mapping);
|
queue = q_addr(&sq->mapping);
|
||||||
while (*sq_headp(sq) != new_tail) {
|
while (*sq_headp(sq) != new_tail) {
|
||||||
int err;
|
int err;
|
||||||
struct spdk_nvme_cmd *cmd = &queue[*sq_headp(sq)];
|
struct spdk_nvme_cmd *cmd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Linux host nvme driver can submit cmd's more than free cq slots
|
||||||
|
* available. So process only those who have cq slots available.
|
||||||
|
*/
|
||||||
|
if (free_cq_slots-- == 0) {
|
||||||
|
cq->last_head = *cq_dbl_headp(cq);
|
||||||
|
|
||||||
|
free_cq_slots = cq_free_slots(cq);
|
||||||
|
if (free_cq_slots > 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there are no free cq slots then kick interrupt FD to loop
|
||||||
|
* again to process remaining sq cmds.
|
||||||
|
* In case of polling mode we will process remaining sq cmds during
|
||||||
|
* next polling interation.
|
||||||
|
* sq head is advanced only for consumed commands.
|
||||||
|
*/
|
||||||
|
if (in_interrupt_mode(ctrlr->transport)) {
|
||||||
|
eventfd_write(ctrlr->intr_fd, 1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd = &queue[*sq_headp(sq)];
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user