vhost-blk: resubmit inflight descs of packed ring

This patch is for packed ring live recovery.
After reconnection we should resubmit the inflight descs.

Change-Id: I133bf5f1c09029d3c693c0fef67a609d72f2bf69
Signed-off-by: Jin Yu <jin.yu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4127
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Jin Yu 2020-08-18 22:30:31 +08:00 committed by Tomasz Zawadzki
parent ec2e6e2b91
commit 4ad4c76cba
3 changed files with 245 additions and 23 deletions

View File

@ -251,6 +251,12 @@ vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
}
static bool
vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
{
return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
}
int
vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
@ -280,6 +286,22 @@ vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtque
return 0;
}
static bool
vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
uint64_t addr, uint32_t len,
struct vring_packed_desc **desc_table,
uint32_t *desc_table_size)
{
*desc_table_size = len / sizeof(struct vring_packed_desc);
*desc_table = vhost_gpa_to_vva(vsession, addr, len);
if (spdk_unlikely(*desc_table == NULL)) {
return false;
}
return true;
}
int
vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
struct spdk_vhost_virtqueue *virtqueue,
@ -294,13 +316,12 @@ vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
* different from split ring.
*/
if (vhost_vring_packed_desc_is_indirect(*desc)) {
*desc_table_size = (*desc)->len / sizeof(struct vring_packed_desc);
*desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
(*desc)->len);
*desc = *desc_table;
if (spdk_unlikely(*desc == NULL)) {
if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
desc_table, desc_table_size)) {
return -1;
}
*desc = *desc_table;
} else {
*desc_table = NULL;
*desc_table_size = 0;
@ -309,6 +330,34 @@ vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
return 0;
}
int
vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
spdk_vhost_inflight_desc *desc_array,
uint16_t req_idx, spdk_vhost_inflight_desc **desc,
struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
{
*desc = &desc_array[req_idx];
if (vhost_inflight_packed_desc_is_indirect(*desc)) {
if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
desc_table, desc_table_size)) {
return -1;
}
/* This desc is the inflight desc not the packed desc.
* When set the F_INDIRECT the table entry should be the packed desc
* so set the inflight desc NULL.
*/
*desc = NULL;
} else {
/* When not set the F_INDIRECT means there is no packed desc table */
*desc_table = NULL;
*desc_table_size = 0;
}
return 0;
}
int
vhost_vq_used_signal(struct spdk_vhost_session *vsession,
struct spdk_vhost_virtqueue *virtqueue)
@ -624,6 +673,12 @@ vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
}
bool
vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
{
return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
}
int
vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
struct spdk_vhost_virtqueue *vq,
@ -695,6 +750,14 @@ vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec
desc->addr, desc->len);
}
int
vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
{
return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
desc->addr, desc->len);
}
/* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
* 2, Update the vq->last_avail_idx to point next available desc chain.
* 3, Update the avail_wrap_counter if last_avail_idx overturn.

View File

@ -246,26 +246,20 @@ blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
}
static int
blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
struct spdk_vhost_virtqueue *vq,
uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
struct vring_packed_desc *desc_table, uint16_t desc_table_size,
struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
struct spdk_vhost_session *vsession = &bvsession->vsession;
struct spdk_vhost_dev *vdev = vsession->vdev;
struct vring_packed_desc *desc = NULL, *desc_table;
uint16_t out_cnt = 0, cnt = 0;
uint32_t desc_table_size, len = 0;
int rc = 0;
struct vring_packed_desc *desc;
uint16_t cnt = 0, out_cnt = 0;
uint32_t len = 0;
rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
&desc_table, &desc_table_size);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
return rc;
}
if (desc_table != NULL) {
if (desc_table == NULL) {
desc = &vq->vring.desc_packed[req_idx];
} else {
req_idx = 0;
desc = desc_table;
}
while (1) {
@ -310,6 +304,96 @@ blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
return 0;
}
static int
blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
struct spdk_vhost_session *vsession = &bvsession->vsession;
struct spdk_vhost_dev *vdev = vsession->vdev;
struct vring_packed_desc *desc = NULL, *desc_table;
uint32_t desc_table_size;
int rc;
rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
&desc_table, &desc_table_size);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
return rc;
}
return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
iovs, iovs_cnt, length);
}
static int
blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
struct spdk_vhost_session *vsession = &bvsession->vsession;
struct spdk_vhost_dev *vdev = vsession->vdev;
spdk_vhost_inflight_desc *inflight_desc;
struct vring_packed_desc *desc_table;
uint16_t out_cnt = 0, cnt = 0;
uint32_t desc_table_size, len = 0;
int rc = 0;
rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
req_idx, &inflight_desc, &desc_table, &desc_table_size);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
return rc;
}
if (desc_table != NULL) {
return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
iovs, iovs_cnt, length);
}
while (1) {
/*
* Maximum cnt reached?
* Should not happen if request is well formatted, otherwise this is a BUG.
*/
if (spdk_unlikely(cnt == *iovs_cnt)) {
SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
vsession->name, req_idx);
return -EINVAL;
}
if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
vsession->name, req_idx, cnt);
return -EINVAL;
}
len += inflight_desc->len;
out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
/* Without F_NEXT means it's the last desc */
if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
break;
}
inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
}
/*
* There must be least two descriptors.
* First contain request so it must be readable.
* Last descriptor contain buffer for response so it must be writable.
*/
if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
return -EINVAL;
}
*length = len;
*iovs_cnt = cnt;
return 0;
}
static void
blk_request_finish(bool success, struct spdk_vhost_blk_task *task)
{
@ -639,6 +723,64 @@ process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
}
}
static void
process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
uint16_t req_idx)
{
spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
struct spdk_vhost_blk_task *task;
uint16_t task_idx, num_descs;
int rc;
task_idx = desc_array[desc->last].id;
num_descs = desc->num;
/* In packed ring reconnection, we use the last_used_idx as the
* initial value. So when we process the inflight descs we still
* need to update the available ring index.
*/
vq->last_avail_idx += num_descs;
if (vq->last_avail_idx >= vq->vring.size) {
vq->last_avail_idx -= vq->vring.size;
vq->packed.avail_phase = !vq->packed.avail_phase;
}
task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx];
if (spdk_unlikely(task->used)) {
SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
task->bvsession->vsession.name, task_idx);
task->used_len = 0;
blk_task_enqueue(task);
return;
}
task->req_idx = req_idx;
task->num_descs = num_descs;
task->buffer_id = task_idx;
/* It's for cleaning inflight entries */
task->inflight_head = req_idx;
task->bvsession->vsession.task_cnt++;
blk_task_init(task);
rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, task->iovs, &task->iovcnt,
&task->payload_size);
if (rc) {
SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
/* Only READ and WRITE are supported for now. */
invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
return;
}
if (process_blk_request(task, task->bvsession) == 0) {
SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
task_idx);
} else {
SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
}
}
static void
submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
struct spdk_vhost_virtqueue *vq)
@ -665,8 +807,12 @@ submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
continue;
}
if (vq->packed.packed_ring) {
process_packed_inflight_blk_task(vq, req_idx);
} else {
process_blk_task(vq, req_idx);
}
}
free(resubmit_list);
resubmit->resubmit_list = NULL;
@ -708,6 +854,8 @@ process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_vi
{
uint16_t i = 0;
submit_inflight_desc(bvsession, vq);
while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
vhost_vq_packed_ring_is_avail(vq)) {
SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",

View File

@ -86,6 +86,7 @@
typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc;
typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info;
typedef struct rte_vhost_inflight_desc_packed spdk_vhost_inflight_desc;
struct spdk_vhost_virtqueue {
struct rte_vhost_vring vring;
@ -287,6 +288,11 @@ int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
uint16_t req_idx, struct vring_packed_desc **desc,
struct vring_packed_desc **desc_table, uint32_t *desc_table_size);
int vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
spdk_vhost_inflight_desc *desc_array,
uint16_t req_idx, spdk_vhost_inflight_desc **desc,
struct vring_packed_desc **desc_table, uint32_t *desc_table_size);
/**
* Send IRQ/call client (if pending) for \c vq.
* \param vsession vhost session
@ -379,6 +385,11 @@ bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc);
int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
uint16_t *iov_index, const struct vring_packed_desc *desc);
bool vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc);
int vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
uint16_t *iov_index, const spdk_vhost_inflight_desc *desc);
uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
uint16_t *num_descs);