diff --git a/include/spdk_internal/rdma.h b/include/spdk_internal/rdma.h index 820771f02..fb34e5069 100644 --- a/include/spdk_internal/rdma.h +++ b/include/spdk_internal/rdma.h @@ -48,9 +48,15 @@ struct spdk_rdma_qp_init_attr { bool initiator_side; }; +struct spdk_rdma_send_wr_list { + struct ibv_send_wr *first; + struct ibv_send_wr *last; +}; + struct spdk_rdma_qp { struct ibv_qp *qp; struct rdma_cm_id *cm_id; + struct spdk_rdma_send_wr_list send_wrs; }; /** @@ -83,4 +89,22 @@ void spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp); */ int spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp); +/** + * Append the given send wr structure to the qpair's outstanding sends list. + * This function accepts either a single Work Request or the first WR in a linked list. + * + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param first Pointer to the first Work Request + * \return true if there were no outstanding WRs before, false otherwise + */ +bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first); + +/** + * Submit all queued Work Request + * \param spdk_rdma_qp Pointer to SPDK RDMA qpair + * \param bad_wr Stores a pointer to the first failed WR if this function return nonzero value + * \return 0 on succes, errno on failure + */ +int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr); + #endif /* SPDK_RDMA_H */ diff --git a/lib/rdma/rdma_mlx5_dv.c b/lib/rdma/rdma_mlx5_dv.c index da035adbe..2002354c2 100644 --- a/lib/rdma/rdma_mlx5_dv.c +++ b/lib/rdma/rdma_mlx5_dv.c @@ -188,6 +188,10 @@ spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp) mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + if (spdk_rdma_qp->send_wrs.first != NULL) { + SPDK_WARNLOG("Destroying qpair with queued Work Requests\n"); + } + if (mlx5_qp->common.qp) { rc = ibv_destroy_qp(mlx5_qp->common.qp); if (rc) { @@ -206,7 +210,7 @@ spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp) assert(spdk_rdma_qp != NULL); if (spdk_rdma_qp->qp) { - struct ibv_qp_attr qp_attr = { .qp_state = IBV_QPS_ERR }; + struct ibv_qp_attr qp_attr = {.qp_state = IBV_QPS_ERR}; rc = ibv_modify_qp(spdk_rdma_qp->qp, &qp_attr, IBV_QP_STATE); if (rc) { @@ -224,3 +228,80 @@ spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp) return rc; } + +bool +spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first) +{ + struct ibv_send_wr *tmp; + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + bool is_first; + + assert(spdk_rdma_qp); + assert(first); + + is_first = spdk_rdma_qp->send_wrs.first == NULL; + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + if (is_first) { + ibv_wr_start(mlx5_qp->qpex); + spdk_rdma_qp->send_wrs.first = first; + } else { + spdk_rdma_qp->send_wrs.last->next = first; + } + + for (tmp = first; tmp != NULL; tmp = tmp->next) { + mlx5_qp->qpex->wr_id = tmp->wr_id; + mlx5_qp->qpex->wr_flags = tmp->send_flags; + + switch (tmp->opcode) { + case IBV_WR_SEND: + ibv_wr_send(mlx5_qp->qpex); + break; + case IBV_WR_SEND_WITH_INV: + ibv_wr_send_inv(mlx5_qp->qpex, tmp->invalidate_rkey); + break; + case IBV_WR_RDMA_READ: + ibv_wr_rdma_read(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr); + break; + case IBV_WR_RDMA_WRITE: + ibv_wr_rdma_write(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr); + break; + default: + SPDK_ERRLOG("Unexpected opcode %d\n", tmp->opcode); + assert(0); + } + + ibv_wr_set_sge_list(mlx5_qp->qpex, tmp->num_sge, tmp->sg_list); + + spdk_rdma_qp->send_wrs.last = tmp; + } + + return is_first; +} + +int +spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr) +{ + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + int rc; + + assert(bad_wr); + assert(spdk_rdma_qp); + + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + if (spdk_unlikely(spdk_rdma_qp->send_wrs.first == NULL)) { + return 0; + } + + rc = ibv_wr_complete(mlx5_qp->qpex); + + if (spdk_unlikely(rc)) { + /* If ibv_wr_complete reports an error that means that no WRs are posted to NIC */ + *bad_wr = spdk_rdma_qp->send_wrs.first; + } + + spdk_rdma_qp->send_wrs.first = NULL; + + return rc; +} diff --git a/lib/rdma/rdma_verbs.c b/lib/rdma/rdma_verbs.c index 123d2f8f1..e2896ea60 100644 --- a/lib/rdma/rdma_verbs.c +++ b/lib/rdma/rdma_verbs.c @@ -86,6 +86,10 @@ spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp) { assert(spdk_rdma_qp != NULL); + if (spdk_rdma_qp->send_wrs.first != NULL) { + SPDK_WARNLOG("Destroying qpair with queued Work Requests\n"); + } + if (spdk_rdma_qp->qp) { rdma_destroy_qp(spdk_rdma_qp->cm_id); } @@ -109,3 +113,46 @@ spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp) return rc; } + +bool +spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first) +{ + struct ibv_send_wr *last; + + assert(spdk_rdma_qp); + assert(first); + + last = first; + while (last->next != NULL) { + last = last->next; + } + + if (spdk_rdma_qp->send_wrs.first == NULL) { + spdk_rdma_qp->send_wrs.first = first; + spdk_rdma_qp->send_wrs.last = last; + return true; + } else { + spdk_rdma_qp->send_wrs.last->next = first; + spdk_rdma_qp->send_wrs.last = last; + return false; + } +} + +int +spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr) +{ + int rc; + + assert(spdk_rdma_qp); + assert(bad_wr); + + if (spdk_unlikely(!spdk_rdma_qp->send_wrs.first)) { + return 0; + } + + rc = ibv_post_send(spdk_rdma_qp->qp, spdk_rdma_qp->send_wrs.first, bad_wr); + + spdk_rdma_qp->send_wrs.first = NULL; + + return rc; +} diff --git a/lib/rdma/spdk_rdma.map b/lib/rdma/spdk_rdma.map index 96fbe3294..f20e47a7a 100644 --- a/lib/rdma/spdk_rdma.map +++ b/lib/rdma/spdk_rdma.map @@ -6,6 +6,8 @@ spdk_rdma_qp_complete_connect; spdk_rdma_qp_destroy; spdk_rdma_qp_disconnect; + spdk_rdma_qp_queue_send_wrs; + spdk_rdma_qp_flush_send_wrs; local: *; };