Previously we poll the MMIO callbacks in the context of ADMIN queue's poll group, here we do some improvement to start a poller to do MMIO poll, then the group poll will only process NVMe commands while the MMIO poller will process MMIO access. This is useful when doing live migration, because the migration region defined by VFIO is a BAR region, we should stop polling queue pairs but ack the MMIO accesses during the live migration. Change-Id: I63bac44889cbe0c31d47599810aab8335dfd4ff5 Signed-off-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7251 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: <dongx.yi@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2391 lines
60 KiB
C
2391 lines
60 KiB
C
/*-
|
|
* BSD LICENSE
|
|
* Copyright (c) Intel Corporation. All rights reserved.
|
|
* Copyright (c) 2019, Nutanix Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* NVMe over vfio-user transport
|
|
*/
|
|
|
|
#include <vfio-user/libvfio-user.h>
|
|
#include <vfio-user/pci_defs.h>
|
|
|
|
#include "spdk/barrier.h"
|
|
#include "spdk/stdinc.h"
|
|
#include "spdk/assert.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/nvmf_transport.h"
|
|
#include "spdk/sock.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/log.h"
|
|
|
|
#include "transport.h"
|
|
|
|
#include "nvmf_internal.h"
|
|
|
|
#define NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH 256
|
|
#define NVMF_VFIO_USER_DEFAULT_AQ_DEPTH 32
|
|
#define NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
|
|
#define NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE 0
|
|
#define NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE 131072
|
|
#define NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE 131072
|
|
#define NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS 512 /* internal buf size */
|
|
#define NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE 0
|
|
|
|
#define NVMF_VFIO_USER_DOORBELLS_OFFSET 0x1000
|
|
#define NVMF_VFIO_USER_DOORBELLS_SIZE 0x1000
|
|
|
|
#define NVME_REG_CFG_SIZE 0x1000
|
|
#define NVME_REG_BAR0_SIZE 0x4000
|
|
#define NVME_IRQ_INTX_NUM 1
|
|
#define NVME_IRQ_MSIX_NUM NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR
|
|
|
|
struct nvmf_vfio_user_req;
|
|
struct nvmf_vfio_user_qpair;
|
|
|
|
typedef int (*nvmf_vfio_user_req_cb_fn)(struct nvmf_vfio_user_req *req, void *cb_arg);
|
|
|
|
#define NVMF_VFIO_USER_MDTS 32
|
|
#define NVMF_VFIO_USER_MAX_IOVECS (NVMF_VFIO_USER_MDTS + 1)
|
|
|
|
struct nvmf_vfio_user_req {
|
|
struct spdk_nvmf_request req;
|
|
struct spdk_nvme_cpl rsp;
|
|
struct spdk_nvme_cmd cmd;
|
|
uint16_t cid;
|
|
|
|
nvmf_vfio_user_req_cb_fn cb_fn;
|
|
void *cb_arg;
|
|
|
|
dma_sg_t sg[NVMF_VFIO_USER_MAX_IOVECS];
|
|
struct iovec iov[NVMF_VFIO_USER_MAX_IOVECS];
|
|
uint8_t iovcnt;
|
|
|
|
TAILQ_ENTRY(nvmf_vfio_user_req) link;
|
|
};
|
|
|
|
/*
|
|
* A NVMe queue.
|
|
*/
|
|
struct nvme_q {
|
|
bool is_cq;
|
|
|
|
void *addr;
|
|
|
|
dma_sg_t sg;
|
|
struct iovec iov;
|
|
|
|
uint32_t size;
|
|
uint64_t prp1;
|
|
|
|
union {
|
|
struct {
|
|
uint32_t head;
|
|
/* multiple SQs can be mapped to the same CQ */
|
|
uint16_t cqid;
|
|
};
|
|
struct {
|
|
uint32_t tail;
|
|
uint16_t iv;
|
|
bool ien;
|
|
};
|
|
};
|
|
};
|
|
|
|
enum nvmf_vfio_user_qpair_state {
|
|
VFIO_USER_QPAIR_UNINITIALIZED = 0,
|
|
VFIO_USER_QPAIR_ACTIVE,
|
|
VFIO_USER_QPAIR_DELETED,
|
|
VFIO_USER_QPAIR_INACTIVE,
|
|
VFIO_USER_QPAIR_ERROR,
|
|
};
|
|
|
|
struct nvmf_vfio_user_qpair {
|
|
struct spdk_nvmf_qpair qpair;
|
|
struct spdk_nvmf_transport_poll_group *group;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct nvmf_vfio_user_req *reqs_internal;
|
|
uint16_t qsize;
|
|
struct nvme_q cq;
|
|
struct nvme_q sq;
|
|
enum nvmf_vfio_user_qpair_state state;
|
|
|
|
TAILQ_HEAD(, nvmf_vfio_user_req) reqs;
|
|
TAILQ_ENTRY(nvmf_vfio_user_qpair) link;
|
|
};
|
|
|
|
struct nvmf_vfio_user_poll_group {
|
|
struct spdk_nvmf_transport_poll_group group;
|
|
TAILQ_HEAD(, nvmf_vfio_user_qpair) qps;
|
|
};
|
|
|
|
struct nvmf_vfio_user_ctrlr {
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
struct nvmf_vfio_user_transport *transport;
|
|
|
|
/* True when the socket connection is active */
|
|
bool ready;
|
|
/* Number of connected queue pairs */
|
|
uint32_t num_connected_qps;
|
|
|
|
struct spdk_thread *thread;
|
|
struct spdk_poller *mmio_poller;
|
|
|
|
uint16_t cntlid;
|
|
|
|
struct nvmf_vfio_user_qpair *qp[NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR];
|
|
|
|
TAILQ_ENTRY(nvmf_vfio_user_ctrlr) link;
|
|
|
|
volatile uint32_t *doorbells;
|
|
|
|
/* internal CSTS.CFS register for vfio-user fatal errors */
|
|
uint32_t cfs : 1;
|
|
};
|
|
|
|
struct nvmf_vfio_user_endpoint {
|
|
vfu_ctx_t *vfu_ctx;
|
|
struct msixcap *msix;
|
|
vfu_pci_config_space_t *pci_config_space;
|
|
int fd;
|
|
volatile uint32_t *doorbells;
|
|
|
|
struct spdk_nvme_transport_id trid;
|
|
const struct spdk_nvmf_subsystem *subsystem;
|
|
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
pthread_mutex_t lock;
|
|
|
|
TAILQ_ENTRY(nvmf_vfio_user_endpoint) link;
|
|
};
|
|
|
|
struct nvmf_vfio_user_transport {
|
|
struct spdk_nvmf_transport transport;
|
|
pthread_mutex_t lock;
|
|
TAILQ_HEAD(, nvmf_vfio_user_endpoint) endpoints;
|
|
|
|
TAILQ_HEAD(, nvmf_vfio_user_qpair) new_qps;
|
|
};
|
|
|
|
/*
|
|
* function prototypes
|
|
*/
|
|
static volatile uint32_t *
|
|
hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q);
|
|
|
|
static volatile uint32_t *
|
|
tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q);
|
|
|
|
static int
|
|
nvmf_vfio_user_req_free(struct spdk_nvmf_request *req);
|
|
|
|
static struct nvmf_vfio_user_req *
|
|
get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair);
|
|
|
|
static int
|
|
post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
|
|
struct nvme_q *cq, uint32_t cdw0, uint16_t sc,
|
|
uint16_t sct);
|
|
|
|
static char *
|
|
endpoint_id(struct nvmf_vfio_user_endpoint *endpoint)
|
|
{
|
|
return endpoint->trid.traddr;
|
|
}
|
|
|
|
static char *
|
|
ctrlr_id(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
if (!ctrlr || !ctrlr->endpoint) {
|
|
return "Null Ctrlr";
|
|
}
|
|
|
|
return endpoint_id(ctrlr->endpoint);
|
|
}
|
|
|
|
static uint16_t
|
|
io_q_id(struct nvme_q *q)
|
|
{
|
|
|
|
struct nvmf_vfio_user_qpair *vfio_user_qpair;
|
|
|
|
assert(q);
|
|
|
|
if (q->is_cq) {
|
|
vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, cq);
|
|
} else {
|
|
vfio_user_qpair = SPDK_CONTAINEROF(q, struct nvmf_vfio_user_qpair, sq);
|
|
}
|
|
assert(vfio_user_qpair);
|
|
return vfio_user_qpair->qpair.qid;
|
|
}
|
|
|
|
static void
|
|
fail_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
|
|
if (ctrlr->cfs == 0) {
|
|
SPDK_ERRLOG(":%s failing controller\n", ctrlr_id(ctrlr));
|
|
}
|
|
|
|
ctrlr->ready = false;
|
|
ctrlr->cfs = 1U;
|
|
}
|
|
|
|
static bool
|
|
ctrlr_interrupt_enabled(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(ctrlr->endpoint != NULL);
|
|
|
|
vfu_pci_config_space_t *pci = ctrlr->endpoint->pci_config_space;
|
|
|
|
return (!pci->hdr.cmd.id || ctrlr->endpoint->msix->mxc.mxe);
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_destroy_endpoint(struct nvmf_vfio_user_endpoint *endpoint)
|
|
{
|
|
if (endpoint->doorbells) {
|
|
munmap((void *)endpoint->doorbells, NVMF_VFIO_USER_DOORBELLS_SIZE);
|
|
}
|
|
|
|
if (endpoint->fd > 0) {
|
|
close(endpoint->fd);
|
|
}
|
|
|
|
vfu_destroy_ctx(endpoint->vfu_ctx);
|
|
|
|
pthread_mutex_destroy(&endpoint->lock);
|
|
free(endpoint);
|
|
}
|
|
|
|
/* called when process exits */
|
|
static int
|
|
nvmf_vfio_user_destroy(struct spdk_nvmf_transport *transport,
|
|
spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg)
|
|
{
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
struct nvmf_vfio_user_endpoint *endpoint, *tmp;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "destroy transport\n");
|
|
|
|
vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
|
|
transport);
|
|
|
|
(void)pthread_mutex_destroy(&vu_transport->lock);
|
|
|
|
TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
|
|
TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link);
|
|
nvmf_vfio_user_destroy_endpoint(endpoint);
|
|
}
|
|
|
|
free(vu_transport);
|
|
|
|
if (cb_fn) {
|
|
cb_fn(cb_arg);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct spdk_nvmf_transport *
|
|
nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts)
|
|
{
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
int err;
|
|
|
|
vu_transport = calloc(1, sizeof(*vu_transport));
|
|
if (vu_transport == NULL) {
|
|
SPDK_ERRLOG("Transport alloc fail: %m\n");
|
|
return NULL;
|
|
}
|
|
|
|
err = pthread_mutex_init(&vu_transport->lock, NULL);
|
|
if (err != 0) {
|
|
SPDK_ERRLOG("Pthread initialisation failed (%d)\n", err);
|
|
goto err;
|
|
}
|
|
|
|
TAILQ_INIT(&vu_transport->endpoints);
|
|
TAILQ_INIT(&vu_transport->new_qps);
|
|
|
|
return &vu_transport->transport;
|
|
|
|
err:
|
|
free(vu_transport);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static uint16_t
|
|
max_queue_size(struct nvmf_vfio_user_ctrlr const *ctrlr)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(ctrlr->qp[0] != NULL);
|
|
assert(ctrlr->qp[0]->qpair.ctrlr != NULL);
|
|
|
|
return ctrlr->qp[0]->qpair.ctrlr->vcprop.cap.bits.mqes + 1;
|
|
}
|
|
|
|
static void *
|
|
map_one(vfu_ctx_t *ctx, uint64_t addr, uint64_t len, dma_sg_t *sg, struct iovec *iov)
|
|
{
|
|
int ret;
|
|
|
|
assert(ctx != NULL);
|
|
assert(sg != NULL);
|
|
assert(iov != NULL);
|
|
|
|
ret = vfu_addr_to_sg(ctx, addr, len, sg, 1, PROT_READ | PROT_WRITE);
|
|
if (ret != 1) {
|
|
return NULL;
|
|
}
|
|
|
|
ret = vfu_map_sg(ctx, sg, iov, 1);
|
|
if (ret != 0) {
|
|
return NULL;
|
|
}
|
|
|
|
assert(iov->iov_base != NULL);
|
|
return iov->iov_base;
|
|
}
|
|
|
|
static uint32_t
|
|
sq_head(struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
assert(qpair != NULL);
|
|
return qpair->sq.head;
|
|
}
|
|
|
|
static void
|
|
sqhd_advance(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(qpair != NULL);
|
|
qpair->sq.head = (qpair->sq.head + 1) % qpair->sq.size;
|
|
}
|
|
|
|
static void
|
|
insert_queue(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q,
|
|
const bool is_cq, const uint16_t id)
|
|
{
|
|
struct nvme_q *_q;
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(q != NULL);
|
|
|
|
qpair = ctrlr->qp[id];
|
|
|
|
q->is_cq = is_cq;
|
|
if (is_cq) {
|
|
_q = &qpair->cq;
|
|
*_q = *q;
|
|
*hdbl(ctrlr, _q) = 0;
|
|
} else {
|
|
_q = &qpair->sq;
|
|
*_q = *q;
|
|
*tdbl(ctrlr, _q) = 0;
|
|
}
|
|
}
|
|
|
|
static int
|
|
asq_map(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
struct nvme_q q = {};
|
|
const struct spdk_nvmf_registers *regs;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(ctrlr->qp[0] != NULL);
|
|
assert(ctrlr->qp[0]->sq.addr == NULL);
|
|
/* XXX ctrlr->asq == 0 is a valid memory address */
|
|
|
|
regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
|
|
q.size = regs->aqa.bits.asqs + 1;
|
|
q.head = ctrlr->doorbells[0] = 0;
|
|
q.cqid = 0;
|
|
q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->asq,
|
|
q.size * sizeof(struct spdk_nvme_cmd), &q.sg, &q.iov);
|
|
if (q.addr == NULL) {
|
|
return -1;
|
|
}
|
|
memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cmd));
|
|
insert_queue(ctrlr, &q, false, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static uint16_t
|
|
cq_next(struct nvme_q *q)
|
|
{
|
|
assert(q != NULL);
|
|
assert(q->is_cq);
|
|
return (q->tail + 1) % q->size;
|
|
}
|
|
|
|
static int
|
|
queue_index(uint16_t qid, int is_cq)
|
|
{
|
|
return (qid * 2) + is_cq;
|
|
}
|
|
|
|
static volatile uint32_t *
|
|
tdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(q != NULL);
|
|
assert(!q->is_cq);
|
|
|
|
return &ctrlr->doorbells[queue_index(io_q_id(q), false)];
|
|
}
|
|
|
|
static volatile uint32_t *
|
|
hdbl(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(q != NULL);
|
|
assert(q->is_cq);
|
|
|
|
return &ctrlr->doorbells[queue_index(io_q_id(q), true)];
|
|
}
|
|
|
|
static bool
|
|
cq_is_full(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvme_q *q)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(q != NULL);
|
|
return cq_next(q) == *hdbl(ctrlr, q);
|
|
}
|
|
|
|
static void
|
|
cq_tail_advance(struct nvme_q *q)
|
|
{
|
|
assert(q != NULL);
|
|
q->tail = cq_next(q);
|
|
}
|
|
|
|
static int
|
|
acq_map(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
struct nvme_q q = {};
|
|
const struct spdk_nvmf_registers *regs;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(ctrlr->qp[0] != NULL);
|
|
assert(ctrlr->qp[0]->cq.addr == NULL);
|
|
|
|
regs = spdk_nvmf_ctrlr_get_regs(ctrlr->qp[0]->qpair.ctrlr);
|
|
assert(regs != NULL);
|
|
|
|
q.size = regs->aqa.bits.acqs + 1;
|
|
q.tail = 0;
|
|
q.addr = map_one(ctrlr->endpoint->vfu_ctx, regs->acq,
|
|
q.size * sizeof(struct spdk_nvme_cpl), &q.sg, &q.iov);
|
|
if (q.addr == NULL) {
|
|
return -1;
|
|
}
|
|
memset(q.addr, 0, q.size * sizeof(struct spdk_nvme_cpl));
|
|
q.is_cq = true;
|
|
q.ien = true;
|
|
insert_queue(ctrlr, &q, true, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void *
|
|
_map_one(void *prv, uint64_t addr, uint64_t len)
|
|
{
|
|
struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)prv;
|
|
struct spdk_nvmf_qpair *qpair;
|
|
struct nvmf_vfio_user_req *vu_req;
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
void *ret;
|
|
|
|
assert(req != NULL);
|
|
qpair = req->qpair;
|
|
vu_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
|
|
assert(vu_req->iovcnt < NVMF_VFIO_USER_MAX_IOVECS);
|
|
ret = map_one(vu_qpair->ctrlr->endpoint->vfu_ctx, addr, len,
|
|
&vu_req->sg[vu_req->iovcnt],
|
|
&vu_req->iov[vu_req->iovcnt]);
|
|
if (spdk_likely(ret != NULL)) {
|
|
vu_req->iovcnt++;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
vfio_user_map_prps(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req,
|
|
struct iovec *iov, uint32_t length)
|
|
{
|
|
/* Map PRP list to from Guest physical memory to
|
|
* virtual memory address.
|
|
*/
|
|
return spdk_nvme_map_prps(req, &req->cmd->nvme_cmd, iov, length,
|
|
4096, _map_one);
|
|
}
|
|
|
|
static struct spdk_nvmf_request *
|
|
get_nvmf_req(struct nvmf_vfio_user_qpair *qp);
|
|
|
|
static int
|
|
handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
|
|
struct spdk_nvmf_request *req);
|
|
|
|
/*
|
|
* Posts a CQE in the completion queue.
|
|
*
|
|
* @ctrlr: the vfio-user controller
|
|
* @cmd: the NVMe command for which the completion is posted
|
|
* @cq: the completion queue
|
|
* @cdw0: cdw0 as reported by NVMf (only for SPDK_NVME_OPC_GET/SET_FEATURES)
|
|
* @sc: the NVMe CQE status code
|
|
* @sct: the NVMe CQE status code type
|
|
*/
|
|
static int
|
|
post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
|
|
struct nvme_q *cq, uint32_t cdw0, uint16_t sc,
|
|
uint16_t sct)
|
|
{
|
|
struct spdk_nvme_cpl *cpl;
|
|
uint16_t qid;
|
|
int err;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(cmd != NULL);
|
|
|
|
qid = io_q_id(cq);
|
|
|
|
if (ctrlr->qp[0]->qpair.ctrlr->vcprop.csts.bits.shst != SPDK_NVME_SHST_NORMAL) {
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: ignore completion SQ%d cid=%d status=%#x\n",
|
|
ctrlr_id(ctrlr), qid, cmd->cid, sc);
|
|
return 0;
|
|
}
|
|
|
|
if (cq_is_full(ctrlr, cq)) {
|
|
SPDK_ERRLOG("%s: CQ%d full (tail=%d, head=%d)\n",
|
|
ctrlr_id(ctrlr), qid, cq->tail, *hdbl(ctrlr, cq));
|
|
return -1;
|
|
}
|
|
|
|
cpl = ((struct spdk_nvme_cpl *)cq->addr) + cq->tail;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: request complete SQ%d cid=%d status=%#x SQ head=%#x CQ tail=%#x\n",
|
|
ctrlr_id(ctrlr), qid, cmd->cid, sc, ctrlr->qp[qid]->sq.head,
|
|
cq->tail);
|
|
|
|
if (qid == 0) {
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_SET_FEATURES:
|
|
case SPDK_NVME_OPC_GET_FEATURES:
|
|
cpl->cdw0 = cdw0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
assert(ctrlr->qp[qid] != NULL);
|
|
|
|
cpl->sqhd = ctrlr->qp[qid]->sq.head;
|
|
cpl->cid = cmd->cid;
|
|
cpl->status.dnr = 0x0;
|
|
cpl->status.m = 0x0;
|
|
cpl->status.sct = sct;
|
|
cpl->status.p = ~cpl->status.p;
|
|
cpl->status.sc = sc;
|
|
|
|
cq_tail_advance(cq);
|
|
|
|
/*
|
|
* this function now executes at SPDK thread context, we
|
|
* might be triggerring interrupts from vfio-user thread context so
|
|
* check for race conditions.
|
|
*/
|
|
if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) {
|
|
err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv);
|
|
if (err != 0) {
|
|
SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n",
|
|
ctrlr_id(ctrlr));
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct nvme_q *
|
|
lookup_io_q(struct nvmf_vfio_user_ctrlr *ctrlr, const uint16_t qid, const bool is_cq)
|
|
{
|
|
struct nvme_q *q;
|
|
|
|
assert(ctrlr != NULL);
|
|
|
|
if (qid > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) {
|
|
return NULL;
|
|
}
|
|
|
|
if (ctrlr->qp[qid] == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
if (is_cq) {
|
|
q = &ctrlr->qp[qid]->cq;
|
|
} else {
|
|
q = &ctrlr->qp[qid]->sq;
|
|
}
|
|
|
|
if (q->addr == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
return q;
|
|
}
|
|
|
|
static void
|
|
unmap_qp(struct nvmf_vfio_user_qpair *qp)
|
|
{
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
|
|
if (qp->ctrlr == NULL) {
|
|
return;
|
|
}
|
|
ctrlr = qp->ctrlr;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy I/O QP%d\n",
|
|
ctrlr_id(ctrlr), qp->qpair.qid);
|
|
|
|
if (qp->sq.addr != NULL) {
|
|
vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->sq.sg, &qp->sq.iov, 1);
|
|
qp->sq.addr = NULL;
|
|
}
|
|
|
|
if (qp->cq.addr != NULL) {
|
|
vfu_unmap_sg(ctrlr->endpoint->vfu_ctx, &qp->cq.sg, &qp->cq.iov, 1);
|
|
qp->cq.addr = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* TODO we can immediately remove the QP from the list because this function
|
|
* is now executed by the SPDK thread.
|
|
*/
|
|
static void
|
|
destroy_qp(struct nvmf_vfio_user_ctrlr *ctrlr, uint16_t qid)
|
|
{
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
|
|
if (ctrlr == NULL) {
|
|
return;
|
|
}
|
|
|
|
qpair = ctrlr->qp[qid];
|
|
if (qpair == NULL) {
|
|
return;
|
|
}
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: destroy QP%d=%p\n", ctrlr_id(ctrlr),
|
|
qid, qpair);
|
|
|
|
unmap_qp(qpair);
|
|
free(qpair->reqs_internal);
|
|
free(qpair);
|
|
ctrlr->qp[qid] = NULL;
|
|
}
|
|
|
|
/* This function can only fail because of memory allocation errors. */
|
|
static int
|
|
init_qp(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_transport *transport,
|
|
const uint16_t qsize, const uint16_t id)
|
|
{
|
|
int err = 0, i;
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
struct nvmf_vfio_user_req *vu_req;
|
|
struct spdk_nvmf_request *req;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(transport != NULL);
|
|
|
|
qpair = calloc(1, sizeof(*qpair));
|
|
if (qpair == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
qpair->qpair.qid = id;
|
|
qpair->qpair.transport = transport;
|
|
qpair->ctrlr = ctrlr;
|
|
qpair->qsize = qsize;
|
|
|
|
TAILQ_INIT(&qpair->reqs);
|
|
|
|
qpair->reqs_internal = calloc(qsize, sizeof(struct nvmf_vfio_user_req));
|
|
if (qpair->reqs_internal == NULL) {
|
|
SPDK_ERRLOG("%s: error allocating reqs: %m\n", ctrlr_id(ctrlr));
|
|
err = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < qsize; i++) {
|
|
vu_req = &qpair->reqs_internal[i];
|
|
req = &vu_req->req;
|
|
|
|
vu_req->cid = i;
|
|
req->qpair = &qpair->qpair;
|
|
req->rsp = (union nvmf_c2h_msg *)&vu_req->rsp;
|
|
req->cmd = (union nvmf_h2c_msg *)&vu_req->cmd;
|
|
|
|
TAILQ_INSERT_TAIL(&qpair->reqs, vu_req, link);
|
|
}
|
|
ctrlr->qp[id] = qpair;
|
|
out:
|
|
if (err != 0) {
|
|
free(qpair);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Creates a completion or sumbission I/O queue. Returns 0 on success, -errno
|
|
* on error.
|
|
*
|
|
* XXX SPDK thread context.
|
|
*/
|
|
static int
|
|
handle_create_io_q(struct nvmf_vfio_user_ctrlr *ctrlr,
|
|
struct spdk_nvme_cmd *cmd, const bool is_cq)
|
|
{
|
|
size_t entry_size;
|
|
uint16_t sc = SPDK_NVME_SC_SUCCESS;
|
|
uint16_t sct = SPDK_NVME_SCT_GENERIC;
|
|
int err = 0;
|
|
struct nvme_q io_q = {};
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(cmd != NULL);
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: create I/O %cQ%d: QSIZE=%#x\n", ctrlr_id(ctrlr),
|
|
is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid,
|
|
cmd->cdw10_bits.create_io_q.qsize);
|
|
|
|
if (cmd->cdw10_bits.create_io_q.qid >= NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR) {
|
|
SPDK_ERRLOG("%s: invalid QID=%d, max=%d\n", ctrlr_id(ctrlr),
|
|
cmd->cdw10_bits.create_io_q.qid,
|
|
NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR);
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
goto out;
|
|
}
|
|
|
|
if (lookup_io_q(ctrlr, cmd->cdw10_bits.create_io_q.qid, is_cq)) {
|
|
SPDK_ERRLOG("%s: %cQ%d already exists\n", ctrlr_id(ctrlr),
|
|
is_cq ? 'C' : 'S', cmd->cdw10_bits.create_io_q.qid);
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
goto out;
|
|
}
|
|
|
|
/* TODO break rest of this function into smaller functions */
|
|
if (is_cq) {
|
|
entry_size = sizeof(struct spdk_nvme_cpl);
|
|
if (cmd->cdw11_bits.create_io_cq.pc != 0x1) {
|
|
/*
|
|
* TODO CAP.CMBS is currently set to zero, however we
|
|
* should zero it out explicitly when CAP is read.
|
|
* Support for CAP.CMBS is not mentioned in the NVMf
|
|
* spec.
|
|
*/
|
|
SPDK_ERRLOG("%s: non-PC CQ not supporred\n", ctrlr_id(ctrlr));
|
|
sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF;
|
|
goto out;
|
|
}
|
|
io_q.ien = cmd->cdw11_bits.create_io_cq.ien;
|
|
io_q.iv = cmd->cdw11_bits.create_io_cq.iv;
|
|
} else {
|
|
/* CQ must be created before SQ */
|
|
if (!lookup_io_q(ctrlr, cmd->cdw11_bits.create_io_sq.cqid, true)) {
|
|
SPDK_ERRLOG("%s: CQ%d does not exist\n", ctrlr_id(ctrlr),
|
|
cmd->cdw11_bits.create_io_sq.cqid);
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_COMPLETION_QUEUE_INVALID;
|
|
goto out;
|
|
}
|
|
|
|
entry_size = sizeof(struct spdk_nvme_cmd);
|
|
if (cmd->cdw11_bits.create_io_sq.pc != 0x1) {
|
|
SPDK_ERRLOG("%s: non-PC SQ not supported\n", ctrlr_id(ctrlr));
|
|
sc = SPDK_NVME_SC_INVALID_CONTROLLER_MEM_BUF;
|
|
goto out;
|
|
}
|
|
|
|
io_q.cqid = cmd->cdw11_bits.create_io_sq.cqid;
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: SQ%d CQID=%d\n", ctrlr_id(ctrlr),
|
|
cmd->cdw10_bits.create_io_q.qid, io_q.cqid);
|
|
}
|
|
|
|
io_q.size = cmd->cdw10_bits.create_io_q.qsize + 1;
|
|
if (io_q.size > max_queue_size(ctrlr)) {
|
|
SPDK_ERRLOG("%s: queue too big, want=%d, max=%d\n", ctrlr_id(ctrlr),
|
|
io_q.size, max_queue_size(ctrlr));
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_INVALID_QUEUE_SIZE;
|
|
goto out;
|
|
}
|
|
|
|
io_q.addr = map_one(ctrlr->endpoint->vfu_ctx, cmd->dptr.prp.prp1,
|
|
io_q.size * entry_size, &io_q.sg, &io_q.iov);
|
|
if (io_q.addr == NULL) {
|
|
sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
SPDK_ERRLOG("%s: failed to map I/O queue: %m\n", ctrlr_id(ctrlr));
|
|
goto out;
|
|
}
|
|
io_q.prp1 = cmd->dptr.prp.prp1;
|
|
memset(io_q.addr, 0, io_q.size * entry_size);
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: mapped %cQ%d IOVA=%#lx vaddr=%#llx\n",
|
|
ctrlr_id(ctrlr), is_cq ? 'C' : 'S',
|
|
cmd->cdw10_bits.create_io_q.qid, cmd->dptr.prp.prp1,
|
|
(unsigned long long)io_q.addr);
|
|
|
|
if (is_cq) {
|
|
err = init_qp(ctrlr, ctrlr->qp[0]->qpair.transport, io_q.size,
|
|
cmd->cdw10_bits.create_io_q.qid);
|
|
if (err != 0) {
|
|
sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
goto out;
|
|
}
|
|
} else {
|
|
/*
|
|
* After we've returned from the nvmf_vfio_user_poll_group_poll thread, once
|
|
* nvmf_vfio_user_accept executes it will pick up this QP and will eventually
|
|
* call nvmf_vfio_user_poll_group_add. The rest of the opertion needed to
|
|
* complete the addition of the queue will be continued at the
|
|
* completion callback.
|
|
*/
|
|
TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[cmd->cdw10_bits.create_io_q.qid], link);
|
|
|
|
}
|
|
insert_queue(ctrlr, &io_q, is_cq, cmd->cdw10_bits.create_io_q.qid);
|
|
|
|
out:
|
|
return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct);
|
|
}
|
|
|
|
/*
|
|
* Deletes a completion or sumbission I/O queue.
|
|
*/
|
|
static int
|
|
handle_del_io_q(struct nvmf_vfio_user_ctrlr *ctrlr,
|
|
struct spdk_nvme_cmd *cmd, const bool is_cq)
|
|
{
|
|
uint16_t sct = SPDK_NVME_SCT_GENERIC;
|
|
uint16_t sc = SPDK_NVME_SC_SUCCESS;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: delete I/O %cQ: QID=%d\n",
|
|
ctrlr_id(ctrlr), is_cq ? 'C' : 'S',
|
|
cmd->cdw10_bits.delete_io_q.qid);
|
|
|
|
if (lookup_io_q(ctrlr, cmd->cdw10_bits.delete_io_q.qid, is_cq) == NULL) {
|
|
SPDK_ERRLOG("%s: %cQ%d does not exist\n", ctrlr_id(ctrlr),
|
|
is_cq ? 'C' : 'S', cmd->cdw10_bits.delete_io_q.qid);
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
|
|
goto out;
|
|
}
|
|
|
|
if (is_cq) {
|
|
/* SQ must have been deleted first */
|
|
if (ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state != VFIO_USER_QPAIR_DELETED) {
|
|
SPDK_ERRLOG("%s: the associated SQ must be deleted first\n", ctrlr_id(ctrlr));
|
|
sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
|
|
sc = SPDK_NVME_SC_INVALID_QUEUE_DELETION;
|
|
goto out;
|
|
}
|
|
} else {
|
|
/*
|
|
* This doesn't actually delete the I/O queue, we can't
|
|
* do that anyway because NVMf doesn't support it. We're merely
|
|
* telling the poll_group_poll function to skip checking this
|
|
* queue. The only workflow this works is when CC.EN is set to
|
|
* 0 and we're stopping the subsystem, so we know that the
|
|
* relevant callbacks to destroy the queues will be called.
|
|
*/
|
|
assert(ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state == VFIO_USER_QPAIR_ACTIVE);
|
|
ctrlr->qp[cmd->cdw10_bits.delete_io_q.qid]->state = VFIO_USER_QPAIR_DELETED;
|
|
}
|
|
|
|
out:
|
|
return post_completion(ctrlr, cmd, &ctrlr->qp[0]->cq, 0, sc, sct);
|
|
}
|
|
|
|
/*
|
|
* Returns 0 on success and -errno on error.
|
|
*
|
|
* XXX SPDK thread context
|
|
*/
|
|
static int
|
|
consume_admin_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(cmd != NULL);
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: handle admin req opc=%#x cid=%d\n",
|
|
ctrlr_id(ctrlr), cmd->opc, cmd->cid);
|
|
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_CREATE_IO_CQ:
|
|
case SPDK_NVME_OPC_CREATE_IO_SQ:
|
|
return handle_create_io_q(ctrlr, cmd,
|
|
cmd->opc == SPDK_NVME_OPC_CREATE_IO_CQ);
|
|
case SPDK_NVME_OPC_DELETE_IO_SQ:
|
|
case SPDK_NVME_OPC_DELETE_IO_CQ:
|
|
return handle_del_io_q(ctrlr, cmd,
|
|
cmd->opc == SPDK_NVME_OPC_DELETE_IO_CQ);
|
|
default:
|
|
return handle_cmd_req(ctrlr, cmd, get_nvmf_req(ctrlr->qp[0]));
|
|
}
|
|
}
|
|
|
|
static int
|
|
handle_cmd_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
|
|
{
|
|
struct nvmf_vfio_user_qpair *qpair = cb_arg;
|
|
|
|
assert(qpair != NULL);
|
|
assert(req != NULL);
|
|
|
|
vfu_unmap_sg(qpair->ctrlr->endpoint->vfu_ctx, req->sg, req->iov, req->iovcnt);
|
|
|
|
return post_completion(qpair->ctrlr, &req->req.cmd->nvme_cmd,
|
|
&qpair->ctrlr->qp[req->req.qpair->qid]->cq,
|
|
req->req.rsp->nvme_cpl.cdw0,
|
|
req->req.rsp->nvme_cpl.status.sc,
|
|
req->req.rsp->nvme_cpl.status.sct);
|
|
}
|
|
|
|
static int
|
|
consume_cmd(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_qpair *qpair,
|
|
struct spdk_nvme_cmd *cmd)
|
|
{
|
|
assert(qpair != NULL);
|
|
if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
|
|
return consume_admin_cmd(ctrlr, cmd);
|
|
}
|
|
|
|
return handle_cmd_req(ctrlr, cmd, get_nvmf_req(qpair));
|
|
}
|
|
|
|
static ssize_t
|
|
handle_sq_tdbl_write(struct nvmf_vfio_user_ctrlr *ctrlr, const uint32_t new_tail,
|
|
struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
struct spdk_nvme_cmd *queue;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(qpair != NULL);
|
|
|
|
queue = qpair->sq.addr;
|
|
while (sq_head(qpair) != new_tail) {
|
|
int err;
|
|
struct spdk_nvme_cmd *cmd = &queue[sq_head(qpair)];
|
|
|
|
/*
|
|
* SQHD must contain the new head pointer, so we must increase
|
|
* it before we generate a completion.
|
|
*/
|
|
sqhd_advance(ctrlr, qpair);
|
|
|
|
err = consume_cmd(ctrlr, qpair, cmd);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
map_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
int err;
|
|
|
|
assert(ctrlr != NULL);
|
|
|
|
err = acq_map(ctrlr);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
err = asq_map(ctrlr);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
unmap_admin_queue(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
assert(ctrlr->qp[0] != NULL);
|
|
|
|
unmap_qp(ctrlr->qp[0]);
|
|
}
|
|
|
|
static void
|
|
memory_region_add_cb(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len, uint32_t prot)
|
|
{
|
|
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
int i, ret;
|
|
|
|
assert(endpoint != NULL);
|
|
if (endpoint->ctrlr == NULL) {
|
|
return;
|
|
}
|
|
ctrlr = endpoint->ctrlr;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: map IOVA %#lx-%#lx\n",
|
|
ctrlr_id(ctrlr), iova, iova + len);
|
|
|
|
for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
|
|
qpair = ctrlr->qp[i];
|
|
if (qpair == NULL) {
|
|
continue;
|
|
}
|
|
|
|
if (qpair->state != VFIO_USER_QPAIR_INACTIVE) {
|
|
continue;
|
|
}
|
|
|
|
if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
|
|
ret = map_admin_queue(ctrlr);
|
|
if (ret) {
|
|
SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap Admin queue\n");
|
|
continue;
|
|
}
|
|
qpair->state = VFIO_USER_QPAIR_ACTIVE;
|
|
} else {
|
|
struct nvme_q *sq = &qpair->sq;
|
|
struct nvme_q *cq = &qpair->cq;
|
|
|
|
sq->addr = map_one(ctrlr->endpoint->vfu_ctx, sq->prp1, sq->size * 64, &sq->sg, &sq->iov);
|
|
if (!sq->addr) {
|
|
SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap SQID %d %#lx-%#lx\n",
|
|
i, sq->prp1, sq->prp1 + sq->size * 64);
|
|
continue;
|
|
}
|
|
cq->addr = map_one(ctrlr->endpoint->vfu_ctx, cq->prp1, cq->size * 16, &cq->sg, &cq->iov);
|
|
if (!cq->addr) {
|
|
SPDK_DEBUGLOG(nvmf_vfio, "Memory isn't ready to remap CQID %d %#lx-%#lx\n",
|
|
i, cq->prp1, cq->prp1 + cq->size * 16);
|
|
continue;
|
|
}
|
|
qpair->state = VFIO_USER_QPAIR_ACTIVE;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
memory_region_remove_cb(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len)
|
|
{
|
|
|
|
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
int i;
|
|
|
|
assert(endpoint != NULL);
|
|
if (endpoint->ctrlr == NULL) {
|
|
return 0;
|
|
}
|
|
ctrlr = endpoint->ctrlr;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: unmap IOVA %#lx-%#lx\n",
|
|
ctrlr_id(ctrlr), iova, iova + len);
|
|
|
|
for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
|
|
qpair = ctrlr->qp[i];
|
|
if (qpair == NULL) {
|
|
continue;
|
|
}
|
|
|
|
if ((qpair->cq.sg.dma_addr >= iova && qpair->cq.sg.dma_addr < iova + len) ||
|
|
(qpair->sq.sg.dma_addr >= iova && qpair->sq.sg.dma_addr < iova + len)) {
|
|
unmap_qp(qpair);
|
|
qpair->state = VFIO_USER_QPAIR_INACTIVE;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_prop_req_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
|
|
{
|
|
struct nvmf_vfio_user_qpair *qpair = cb_arg;
|
|
int ret;
|
|
|
|
assert(qpair != NULL);
|
|
assert(req != NULL);
|
|
|
|
if (req->req.cmd->prop_get_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET) {
|
|
assert(qpair->ctrlr != NULL);
|
|
assert(req != NULL);
|
|
|
|
memcpy(req->req.data,
|
|
&req->req.rsp->prop_get_rsp.value.u64,
|
|
req->req.length);
|
|
} else {
|
|
assert(req->req.cmd->prop_set_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET);
|
|
assert(qpair->ctrlr != NULL);
|
|
|
|
if (req->req.cmd->prop_set_cmd.ofst == offsetof(struct spdk_nvme_registers, cc)) {
|
|
union spdk_nvme_cc_register *cc;
|
|
|
|
cc = (union spdk_nvme_cc_register *)&req->req.cmd->prop_set_cmd.value.u64;
|
|
|
|
if (cc->bits.en == 1 && cc->bits.shn == 0) {
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: MAP Admin queue\n",
|
|
ctrlr_id(qpair->ctrlr));
|
|
ret = map_admin_queue(qpair->ctrlr);
|
|
if (ret) {
|
|
SPDK_ERRLOG("%s: failed to map Admin queue\n", ctrlr_id(qpair->ctrlr));
|
|
return ret;
|
|
}
|
|
qpair->state = VFIO_USER_QPAIR_ACTIVE;
|
|
} else if ((cc->bits.en == 0 && cc->bits.shn == 0) ||
|
|
(cc->bits.en == 1 && cc->bits.shn != 0)) {
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: UNMAP Admin queue\n",
|
|
ctrlr_id(qpair->ctrlr));
|
|
unmap_admin_queue(qpair->ctrlr);
|
|
qpair->state = VFIO_USER_QPAIR_INACTIVE;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* XXX Do NOT remove, see comment in access_bar0_fn.
|
|
*
|
|
* Handles a write at offset 0x1000 or more.
|
|
*
|
|
* DSTRD is set to fixed value 0 for NVMf.
|
|
*
|
|
*/
|
|
static int
|
|
handle_dbl_access(struct nvmf_vfio_user_ctrlr *ctrlr, uint32_t *buf,
|
|
const size_t count, loff_t pos, const bool is_write)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
assert(buf != NULL);
|
|
|
|
if (count != sizeof(uint32_t)) {
|
|
SPDK_ERRLOG("%s: bad doorbell buffer size %ld\n",
|
|
ctrlr_id(ctrlr), count);
|
|
return -EINVAL;
|
|
}
|
|
|
|
pos -= NVMF_VFIO_USER_DOORBELLS_OFFSET;
|
|
|
|
/* pos must be dword aligned */
|
|
if ((pos & 0x3) != 0) {
|
|
SPDK_ERRLOG("%s: bad doorbell offset %#lx\n", ctrlr_id(ctrlr), pos);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* convert byte offset to array index */
|
|
pos >>= 2;
|
|
|
|
if (pos > NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR * 2) {
|
|
/*
|
|
* TODO: need to emit a "Write to Invalid Doorbell Register"
|
|
* asynchronous event
|
|
*/
|
|
SPDK_ERRLOG("%s: bad doorbell index %#lx\n", ctrlr_id(ctrlr), pos);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (is_write) {
|
|
ctrlr->doorbells[pos] = *buf;
|
|
spdk_wmb();
|
|
} else {
|
|
spdk_rmb();
|
|
*buf = ctrlr->doorbells[pos];
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t
|
|
access_bar0_fn(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos,
|
|
bool is_write)
|
|
{
|
|
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct nvmf_vfio_user_req *req;
|
|
int ret;
|
|
|
|
ctrlr = endpoint->ctrlr;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: bar0 %s ctrlr: %p, count=%zu, pos=%"PRIX64"\n",
|
|
endpoint_id(endpoint), is_write ? "write" : "read",
|
|
ctrlr, count, pos);
|
|
|
|
if (pos >= NVMF_VFIO_USER_DOORBELLS_OFFSET) {
|
|
/*
|
|
* XXX The fact that the doorbells can be memory mapped doesn't
|
|
* mean thath the client (VFIO in QEMU) is obliged to memory
|
|
* map them, it might still elect to access them via regular
|
|
* read/write.
|
|
*/
|
|
ret = handle_dbl_access(ctrlr, (uint32_t *)buf, count,
|
|
pos, is_write);
|
|
if (ret == 0) {
|
|
return count;
|
|
}
|
|
assert(ret < 0);
|
|
return ret;
|
|
}
|
|
|
|
/* Construct a Fabric Property Get/Set command and send it */
|
|
req = get_nvmf_vfio_user_req(ctrlr->qp[0]);
|
|
if (req == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
req->cb_fn = nvmf_vfio_user_prop_req_rsp;
|
|
req->cb_arg = ctrlr->qp[0];
|
|
req->req.cmd->prop_set_cmd.opcode = SPDK_NVME_OPC_FABRIC;
|
|
req->req.cmd->prop_set_cmd.cid = 0;
|
|
req->req.cmd->prop_set_cmd.attrib.size = (count / 4) - 1;
|
|
req->req.cmd->prop_set_cmd.ofst = pos;
|
|
if (is_write) {
|
|
req->req.cmd->prop_set_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET;
|
|
if (req->req.cmd->prop_set_cmd.attrib.size) {
|
|
req->req.cmd->prop_set_cmd.value.u64 = *(uint64_t *)buf;
|
|
} else {
|
|
req->req.cmd->prop_set_cmd.value.u32.high = 0;
|
|
req->req.cmd->prop_set_cmd.value.u32.low = *(uint32_t *)buf;
|
|
}
|
|
} else {
|
|
req->req.cmd->prop_get_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET;
|
|
}
|
|
req->req.length = count;
|
|
req->req.data = buf;
|
|
|
|
spdk_nvmf_request_exec_fabrics(&req->req);
|
|
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* NVMe driver reads 4096 bytes, which is the extended PCI configuration space
|
|
* available on PCI-X 2.0 and PCI Express buses
|
|
*/
|
|
static ssize_t
|
|
access_pci_config(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset,
|
|
bool is_write)
|
|
{
|
|
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
|
|
|
if (is_write) {
|
|
SPDK_ERRLOG("%s: write %#lx-%#lx not supported\n",
|
|
endpoint_id(endpoint), offset, offset + count);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (offset + count > PCI_CFG_SPACE_EXP_SIZE) {
|
|
SPDK_ERRLOG("%s: access past end of extended PCI configuration space, want=%ld+%ld, max=%d\n",
|
|
endpoint_id(endpoint), offset, count,
|
|
PCI_CFG_SPACE_EXP_SIZE);
|
|
return -ERANGE;
|
|
}
|
|
|
|
memcpy(buf, ((unsigned char *)endpoint->pci_config_space) + offset, count);
|
|
|
|
return count;
|
|
}
|
|
|
|
static void
|
|
vfio_user_log(vfu_ctx_t *vfu_ctx, int level, char const *msg)
|
|
{
|
|
struct nvmf_vfio_user_endpoint *endpoint = vfu_get_private(vfu_ctx);
|
|
|
|
if (level >= LOG_DEBUG) {
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg);
|
|
} else if (level >= LOG_INFO) {
|
|
SPDK_INFOLOG(nvmf_vfio, "%s: %s\n", endpoint_id(endpoint), msg);
|
|
} else if (level >= LOG_NOTICE) {
|
|
SPDK_NOTICELOG("%s: %s\n", endpoint_id(endpoint), msg);
|
|
} else if (level >= LOG_WARNING) {
|
|
SPDK_WARNLOG("%s: %s\n", endpoint_id(endpoint), msg);
|
|
} else {
|
|
SPDK_ERRLOG("%s: %s\n", endpoint_id(endpoint), msg);
|
|
}
|
|
}
|
|
|
|
static void
|
|
init_pci_config_space(vfu_pci_config_space_t *p)
|
|
{
|
|
/* MLBAR */
|
|
p->hdr.bars[0].raw = 0x0;
|
|
/* MUBAR */
|
|
p->hdr.bars[1].raw = 0x0;
|
|
|
|
/* vendor specific, let's set them to zero for now */
|
|
p->hdr.bars[3].raw = 0x0;
|
|
p->hdr.bars[4].raw = 0x0;
|
|
p->hdr.bars[5].raw = 0x0;
|
|
|
|
/* enable INTx */
|
|
p->hdr.intr.ipin = 0x1;
|
|
}
|
|
|
|
static int
|
|
vfio_user_dev_info_fill(struct nvmf_vfio_user_endpoint *endpoint)
|
|
{
|
|
int ret;
|
|
ssize_t cap_offset;
|
|
vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx;
|
|
|
|
struct pmcap pmcap = { .hdr.id = PCI_CAP_ID_PM, .pmcs.nsfrst = 0x1 };
|
|
struct pxcap pxcap = {
|
|
.hdr.id = PCI_CAP_ID_EXP,
|
|
.pxcaps.ver = 0x2,
|
|
.pxdcap = {.per = 0x1, .flrc = 0x1},
|
|
.pxdcap2.ctds = 0x1
|
|
};
|
|
|
|
struct msixcap msixcap = {
|
|
.hdr.id = PCI_CAP_ID_MSIX,
|
|
.mxc.ts = NVME_IRQ_MSIX_NUM - 1,
|
|
.mtab = {.tbir = 0x4, .to = 0x0},
|
|
.mpba = {.pbir = 0x5, .pbao = 0x0}
|
|
};
|
|
|
|
static struct iovec sparse_mmap[] = {
|
|
{
|
|
.iov_base = (void *)NVMF_VFIO_USER_DOORBELLS_OFFSET,
|
|
.iov_len = NVMF_VFIO_USER_DOORBELLS_SIZE,
|
|
},
|
|
};
|
|
|
|
ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
vfu_pci_set_id(vfu_ctx, 0x4e58, 0x0001, 0, 0);
|
|
/*
|
|
* 0x02, controller uses the NVM Express programming interface
|
|
* 0x08, non-volatile memory controller
|
|
* 0x01, mass storage controller
|
|
*/
|
|
vfu_pci_set_class(vfu_ctx, 0x01, 0x08, 0x02);
|
|
|
|
cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pmcap);
|
|
if (cap_offset < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &pxcap);
|
|
if (cap_offset < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
cap_offset = vfu_pci_add_capability(vfu_ctx, 0, 0, &msixcap);
|
|
if (cap_offset < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, NVME_REG_CFG_SIZE,
|
|
access_pci_config, VFU_REGION_FLAG_RW, NULL, 0, -1);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup cfg\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, NVME_REG_BAR0_SIZE,
|
|
access_bar0_fn, VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM,
|
|
sparse_mmap, 1, endpoint->fd);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup bar 0\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR4_REGION_IDX, PAGE_SIZE,
|
|
NULL, VFU_REGION_FLAG_RW, NULL, 0, -1);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup bar 4\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR5_REGION_IDX, PAGE_SIZE,
|
|
NULL, VFU_REGION_FLAG_RW, NULL, 0, -1);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup bar 5\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_device_dma_cb(vfu_ctx, memory_region_add_cb, memory_region_remove_cb);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, NVME_IRQ_MSIX_NUM);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
ret = vfu_realize_ctx(vfu_ctx);
|
|
if (ret < 0) {
|
|
SPDK_ERRLOG("vfu_ctx %p failed to realize\n", vfu_ctx);
|
|
return ret;
|
|
}
|
|
|
|
endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx);
|
|
assert(endpoint->pci_config_space != NULL);
|
|
init_pci_config_space(endpoint->pci_config_space);
|
|
|
|
assert(cap_offset != 0);
|
|
endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
_destroy_ctrlr(void *ctx)
|
|
{
|
|
struct nvmf_vfio_user_ctrlr *ctrlr = ctx;
|
|
int i;
|
|
|
|
for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
|
|
destroy_qp(ctrlr, i);
|
|
}
|
|
|
|
if (ctrlr->endpoint) {
|
|
ctrlr->endpoint->ctrlr = NULL;
|
|
}
|
|
|
|
spdk_poller_unregister(&ctrlr->mmio_poller);
|
|
free(ctrlr);
|
|
}
|
|
|
|
static int
|
|
destroy_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
assert(ctrlr != NULL);
|
|
|
|
SPDK_NOTICELOG("destroy %s\n", ctrlr_id(ctrlr));
|
|
|
|
if (ctrlr->thread == spdk_get_thread()) {
|
|
_destroy_ctrlr(ctrlr);
|
|
} else {
|
|
spdk_thread_send_msg(ctrlr->thread, _destroy_ctrlr, ctrlr);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_create_ctrlr(struct nvmf_vfio_user_transport *transport,
|
|
struct nvmf_vfio_user_endpoint *endpoint)
|
|
{
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
int err;
|
|
|
|
/* First, construct a vfio-user CUSTOM transport controller */
|
|
ctrlr = calloc(1, sizeof(*ctrlr));
|
|
if (ctrlr == NULL) {
|
|
err = -ENOMEM;
|
|
goto out;
|
|
}
|
|
ctrlr->cntlid = 0xffff;
|
|
ctrlr->transport = transport;
|
|
ctrlr->endpoint = endpoint;
|
|
ctrlr->doorbells = endpoint->doorbells;
|
|
|
|
/* Then, construct an admin queue pair */
|
|
err = init_qp(ctrlr, &transport->transport, NVMF_VFIO_USER_DEFAULT_AQ_DEPTH, 0);
|
|
if (err != 0) {
|
|
goto out;
|
|
}
|
|
endpoint->ctrlr = ctrlr;
|
|
ctrlr->ready = true;
|
|
|
|
/* Notify the generic layer about the new admin queue pair */
|
|
TAILQ_INSERT_TAIL(&ctrlr->transport->new_qps, ctrlr->qp[0], link);
|
|
|
|
out:
|
|
if (err != 0) {
|
|
SPDK_ERRLOG("%s: failed to create vfio-user controller: %s\n",
|
|
endpoint_id(endpoint), strerror(-err));
|
|
if (destroy_ctrlr(ctrlr) != 0) {
|
|
SPDK_ERRLOG("%s: failed to clean up\n",
|
|
endpoint_id(endpoint));
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_listen(struct spdk_nvmf_transport *transport,
|
|
const struct spdk_nvme_transport_id *trid,
|
|
struct spdk_nvmf_listen_opts *listen_opts)
|
|
{
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
struct nvmf_vfio_user_endpoint *endpoint, *tmp;
|
|
char *path = NULL;
|
|
char uuid[PATH_MAX] = {};
|
|
int fd;
|
|
int err;
|
|
|
|
vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
|
|
transport);
|
|
|
|
TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
|
|
/* Only compare traddr */
|
|
if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) {
|
|
return -EEXIST;
|
|
}
|
|
}
|
|
|
|
endpoint = calloc(1, sizeof(*endpoint));
|
|
if (!endpoint) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
endpoint->fd = -1;
|
|
memcpy(&endpoint->trid, trid, sizeof(endpoint->trid));
|
|
|
|
err = asprintf(&path, "%s/bar0", endpoint_id(endpoint));
|
|
if (err == -1) {
|
|
goto out;
|
|
}
|
|
|
|
fd = open(path, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
|
|
if (fd == -1) {
|
|
SPDK_ERRLOG("%s: failed to open device memory at %s: %m\n",
|
|
endpoint_id(endpoint), path);
|
|
err = fd;
|
|
free(path);
|
|
goto out;
|
|
}
|
|
free(path);
|
|
|
|
err = ftruncate(fd, NVMF_VFIO_USER_DOORBELLS_OFFSET + NVMF_VFIO_USER_DOORBELLS_SIZE);
|
|
if (err != 0) {
|
|
goto out;
|
|
}
|
|
|
|
endpoint->doorbells = mmap(NULL, NVMF_VFIO_USER_DOORBELLS_SIZE,
|
|
PROT_READ | PROT_WRITE, MAP_SHARED, fd, NVMF_VFIO_USER_DOORBELLS_OFFSET);
|
|
if (endpoint->doorbells == MAP_FAILED) {
|
|
endpoint->doorbells = NULL;
|
|
err = -errno;
|
|
goto out;
|
|
}
|
|
|
|
endpoint->fd = fd;
|
|
|
|
snprintf(uuid, PATH_MAX, "%s/cntrl", endpoint_id(endpoint));
|
|
|
|
endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, uuid, LIBVFIO_USER_FLAG_ATTACH_NB,
|
|
endpoint, VFU_DEV_TYPE_PCI);
|
|
if (endpoint->vfu_ctx == NULL) {
|
|
SPDK_ERRLOG("%s: error creating libmuser context: %m\n",
|
|
endpoint_id(endpoint));
|
|
err = -1;
|
|
goto out;
|
|
}
|
|
vfu_setup_log(endpoint->vfu_ctx, vfio_user_log,
|
|
SPDK_DEBUGLOG_FLAG_ENABLED("nvmf_vfio") ? LOG_DEBUG : LOG_ERR);
|
|
|
|
err = vfio_user_dev_info_fill(endpoint);
|
|
if (err < 0) {
|
|
goto out;
|
|
}
|
|
|
|
pthread_mutex_init(&endpoint->lock, NULL);
|
|
TAILQ_INSERT_TAIL(&vu_transport->endpoints, endpoint, link);
|
|
SPDK_NOTICELOG("%s: doorbells %p\n", uuid, endpoint->doorbells);
|
|
|
|
out:
|
|
if (err != 0) {
|
|
nvmf_vfio_user_destroy_endpoint(endpoint);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_stop_listen(struct spdk_nvmf_transport *transport,
|
|
const struct spdk_nvme_transport_id *trid)
|
|
{
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
struct nvmf_vfio_user_endpoint *endpoint, *tmp;
|
|
int err;
|
|
|
|
assert(trid != NULL);
|
|
assert(trid->traddr != NULL);
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: stop listen\n", trid->traddr);
|
|
|
|
vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
|
|
transport);
|
|
|
|
pthread_mutex_lock(&vu_transport->lock);
|
|
TAILQ_FOREACH_SAFE(endpoint, &vu_transport->endpoints, link, tmp) {
|
|
if (strcmp(trid->traddr, endpoint->trid.traddr) == 0) {
|
|
TAILQ_REMOVE(&vu_transport->endpoints, endpoint, link);
|
|
if (endpoint->ctrlr) {
|
|
err = destroy_ctrlr(endpoint->ctrlr);
|
|
if (err != 0) {
|
|
SPDK_ERRLOG("%s: failed destroy controller: %s\n",
|
|
endpoint_id(endpoint), strerror(-err));
|
|
}
|
|
}
|
|
nvmf_vfio_user_destroy_endpoint(endpoint);
|
|
pthread_mutex_unlock(&vu_transport->lock);
|
|
|
|
return;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&vu_transport->lock);
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: not found\n", trid->traddr);
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_cdata_init(struct spdk_nvmf_transport *transport,
|
|
struct spdk_nvmf_subsystem *subsystem,
|
|
struct spdk_nvmf_ctrlr_data *cdata)
|
|
{
|
|
cdata->sgls.supported = SPDK_NVME_SGLS_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_listen_associate(struct spdk_nvmf_transport *transport,
|
|
const struct spdk_nvmf_subsystem *subsystem,
|
|
const struct spdk_nvme_transport_id *trid)
|
|
{
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
|
|
vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport, transport);
|
|
|
|
TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) {
|
|
if (strncmp(endpoint->trid.traddr, trid->traddr, sizeof(endpoint->trid.traddr)) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (endpoint == NULL) {
|
|
return -ENOENT;
|
|
}
|
|
|
|
endpoint->subsystem = subsystem;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Executed periodically.
|
|
*
|
|
* XXX SPDK thread context.
|
|
*/
|
|
static uint32_t
|
|
nvmf_vfio_user_accept(struct spdk_nvmf_transport *transport)
|
|
{
|
|
int err;
|
|
struct nvmf_vfio_user_transport *vu_transport;
|
|
struct nvmf_vfio_user_qpair *qp, *tmp_qp;
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
|
|
vu_transport = SPDK_CONTAINEROF(transport, struct nvmf_vfio_user_transport,
|
|
transport);
|
|
|
|
pthread_mutex_lock(&vu_transport->lock);
|
|
|
|
TAILQ_FOREACH(endpoint, &vu_transport->endpoints, link) {
|
|
/* we need try to attach the controller again after reset or shutdown */
|
|
if (endpoint->ctrlr != NULL && endpoint->ctrlr->ready) {
|
|
continue;
|
|
}
|
|
|
|
err = vfu_attach_ctx(endpoint->vfu_ctx);
|
|
if (err != 0) {
|
|
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
|
continue;
|
|
}
|
|
|
|
pthread_mutex_unlock(&vu_transport->lock);
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* Construct a controller */
|
|
nvmf_vfio_user_create_ctrlr(vu_transport, endpoint);
|
|
}
|
|
|
|
TAILQ_FOREACH_SAFE(qp, &vu_transport->new_qps, link, tmp_qp) {
|
|
TAILQ_REMOVE(&vu_transport->new_qps, qp, link);
|
|
spdk_nvmf_tgt_new_qpair(transport->tgt, &qp->qpair);
|
|
}
|
|
|
|
pthread_mutex_unlock(&vu_transport->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_discover(struct spdk_nvmf_transport *transport,
|
|
struct spdk_nvme_transport_id *trid,
|
|
struct spdk_nvmf_discovery_log_page_entry *entry)
|
|
{ }
|
|
|
|
static struct spdk_nvmf_transport_poll_group *
|
|
nvmf_vfio_user_poll_group_create(struct spdk_nvmf_transport *transport)
|
|
{
|
|
struct nvmf_vfio_user_poll_group *vu_group;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "create poll group\n");
|
|
|
|
vu_group = calloc(1, sizeof(*vu_group));
|
|
if (vu_group == NULL) {
|
|
SPDK_ERRLOG("Error allocating poll group: %m");
|
|
return NULL;
|
|
}
|
|
|
|
TAILQ_INIT(&vu_group->qps);
|
|
|
|
return &vu_group->group;
|
|
}
|
|
|
|
/* called when process exits */
|
|
static void
|
|
nvmf_vfio_user_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
|
|
{
|
|
struct nvmf_vfio_user_poll_group *vu_group;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "destroy poll group\n");
|
|
|
|
vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
|
|
|
|
free(vu_group);
|
|
}
|
|
|
|
static void
|
|
vfio_user_qpair_disconnect_cb(void *ctx)
|
|
{
|
|
struct nvmf_vfio_user_endpoint *endpoint = ctx;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
|
|
pthread_mutex_lock(&endpoint->lock);
|
|
ctrlr = endpoint->ctrlr;
|
|
if (!ctrlr) {
|
|
pthread_mutex_unlock(&endpoint->lock);
|
|
return;
|
|
}
|
|
|
|
if (!ctrlr->num_connected_qps) {
|
|
destroy_ctrlr(ctrlr);
|
|
pthread_mutex_unlock(&endpoint->lock);
|
|
return;
|
|
}
|
|
pthread_mutex_unlock(&endpoint->lock);
|
|
}
|
|
|
|
static int
|
|
vfio_user_stop_ctrlr(struct nvmf_vfio_user_ctrlr *ctrlr)
|
|
{
|
|
uint32_t i;
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s stop processing\n", ctrlr_id(ctrlr));
|
|
|
|
ctrlr->ready = false;
|
|
endpoint = ctrlr->endpoint;
|
|
assert(endpoint != NULL);
|
|
|
|
for (i = 0; i < NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR; i++) {
|
|
qpair = ctrlr->qp[i];
|
|
if (qpair == NULL) {
|
|
continue;
|
|
}
|
|
spdk_nvmf_qpair_disconnect(&qpair->qpair, vfio_user_qpair_disconnect_cb, endpoint);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
vfio_user_poll_mmio(void *ctx)
|
|
{
|
|
struct nvmf_vfio_user_ctrlr *ctrlr = ctx;
|
|
int ret;
|
|
|
|
assert(ctrlr != NULL);
|
|
|
|
/* This will call access_bar0_fn() if there are any writes
|
|
* to the portion of the BAR that is not mmap'd */
|
|
ret = vfu_run_ctx(ctrlr->endpoint->vfu_ctx);
|
|
if (spdk_unlikely(ret != 0)) {
|
|
spdk_poller_unregister(&ctrlr->mmio_poller);
|
|
|
|
/* initiator shutdown or reset, waiting for another re-connect */
|
|
if (errno == ENOTCONN) {
|
|
vfio_user_stop_ctrlr(ctrlr);
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
fail_ctrlr(ctrlr);
|
|
}
|
|
|
|
return SPDK_POLLER_BUSY;
|
|
}
|
|
|
|
static int
|
|
handle_queue_connect_rsp(struct nvmf_vfio_user_req *req, void *cb_arg)
|
|
{
|
|
struct nvmf_vfio_user_poll_group *vu_group;
|
|
struct nvmf_vfio_user_qpair *qpair = cb_arg;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
|
|
assert(qpair != NULL);
|
|
assert(req != NULL);
|
|
|
|
ctrlr = qpair->ctrlr;
|
|
endpoint = ctrlr->endpoint;
|
|
assert(ctrlr != NULL);
|
|
assert(endpoint != NULL);
|
|
|
|
if (spdk_nvme_cpl_is_error(&req->req.rsp->nvme_cpl)) {
|
|
SPDK_ERRLOG("SC %u, SCT %u\n", req->req.rsp->nvme_cpl.status.sc, req->req.rsp->nvme_cpl.status.sct);
|
|
destroy_qp(ctrlr, qpair->qpair.qid);
|
|
destroy_ctrlr(ctrlr);
|
|
return -1;
|
|
}
|
|
|
|
vu_group = SPDK_CONTAINEROF(qpair->group, struct nvmf_vfio_user_poll_group, group);
|
|
TAILQ_INSERT_TAIL(&vu_group->qps, qpair, link);
|
|
qpair->state = VFIO_USER_QPAIR_ACTIVE;
|
|
|
|
pthread_mutex_lock(&endpoint->lock);
|
|
if (nvmf_qpair_is_admin_queue(&qpair->qpair)) {
|
|
ctrlr->cntlid = qpair->qpair.ctrlr->cntlid;
|
|
ctrlr->thread = spdk_get_thread();
|
|
ctrlr->mmio_poller = SPDK_POLLER_REGISTER(vfio_user_poll_mmio, ctrlr, 0);
|
|
}
|
|
ctrlr->num_connected_qps++;
|
|
pthread_mutex_unlock(&endpoint->lock);
|
|
|
|
free(req->req.data);
|
|
req->req.data = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Called by spdk_nvmf_transport_poll_group_add.
|
|
*/
|
|
static int
|
|
nvmf_vfio_user_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
|
|
struct spdk_nvmf_qpair *qpair)
|
|
{
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
struct nvmf_vfio_user_req *vu_req;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
struct spdk_nvmf_request *req;
|
|
struct spdk_nvmf_fabric_connect_data *data;
|
|
bool admin;
|
|
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
vu_qpair->group = group;
|
|
ctrlr = vu_qpair->ctrlr;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio, "%s: add QP%d=%p(%p) to poll_group=%p\n",
|
|
ctrlr_id(ctrlr), vu_qpair->qpair.qid,
|
|
vu_qpair, qpair, group);
|
|
|
|
admin = nvmf_qpair_is_admin_queue(&vu_qpair->qpair);
|
|
|
|
vu_req = get_nvmf_vfio_user_req(vu_qpair);
|
|
if (vu_req == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
req = &vu_req->req;
|
|
req->cmd->connect_cmd.opcode = SPDK_NVME_OPC_FABRIC;
|
|
req->cmd->connect_cmd.cid = vu_req->cid;
|
|
req->cmd->connect_cmd.fctype = SPDK_NVMF_FABRIC_COMMAND_CONNECT;
|
|
req->cmd->connect_cmd.recfmt = 0;
|
|
req->cmd->connect_cmd.sqsize = vu_qpair->qsize - 1;
|
|
req->cmd->connect_cmd.qid = admin ? 0 : qpair->qid;
|
|
|
|
req->length = sizeof(struct spdk_nvmf_fabric_connect_data);
|
|
req->data = calloc(1, req->length);
|
|
if (req->data == NULL) {
|
|
nvmf_vfio_user_req_free(req);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
data = (struct spdk_nvmf_fabric_connect_data *)req->data;
|
|
data->cntlid = admin ? 0xFFFF : ctrlr->cntlid;
|
|
snprintf(data->subnqn, sizeof(data->subnqn), "%s",
|
|
spdk_nvmf_subsystem_get_nqn(ctrlr->endpoint->subsystem));
|
|
|
|
vu_req->cb_fn = handle_queue_connect_rsp;
|
|
vu_req->cb_arg = vu_qpair;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: sending connect fabrics command for QID=%#x cntlid=%#x\n",
|
|
ctrlr_id(ctrlr), qpair->qid, data->cntlid);
|
|
|
|
spdk_nvmf_request_exec_fabrics(req);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
|
|
struct spdk_nvmf_qpair *qpair)
|
|
{
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
struct nvmf_vfio_user_ctrlr *vu_ctrlr;
|
|
struct nvmf_vfio_user_endpoint *endpoint;
|
|
struct nvmf_vfio_user_poll_group *vu_group;
|
|
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
vu_ctrlr = vu_qpair->ctrlr;
|
|
endpoint = vu_ctrlr->endpoint;
|
|
|
|
SPDK_DEBUGLOG(nvmf_vfio,
|
|
"%s: remove NVMf QP%d=%p from NVMf poll_group=%p\n",
|
|
ctrlr_id(vu_qpair->ctrlr), qpair->qid, qpair, group);
|
|
|
|
|
|
vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
|
|
TAILQ_REMOVE(&vu_group->qps, vu_qpair, link);
|
|
|
|
pthread_mutex_lock(&endpoint->lock);
|
|
assert(vu_ctrlr->num_connected_qps);
|
|
vu_ctrlr->num_connected_qps--;
|
|
pthread_mutex_unlock(&endpoint->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_req_free(struct spdk_nvmf_request *req)
|
|
{
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
struct nvmf_vfio_user_req *vfio_user_req;
|
|
|
|
assert(req != NULL);
|
|
|
|
vfio_user_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
|
|
qpair = SPDK_CONTAINEROF(vfio_user_req->req.qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
|
|
TAILQ_INSERT_TAIL(&qpair->reqs, vfio_user_req, link);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_req_complete(struct spdk_nvmf_request *req)
|
|
{
|
|
struct nvmf_vfio_user_qpair *qpair;
|
|
struct nvmf_vfio_user_req *vfio_user_req;
|
|
|
|
assert(req != NULL);
|
|
|
|
vfio_user_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
|
|
qpair = SPDK_CONTAINEROF(vfio_user_req->req.qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
|
|
if (vfio_user_req->cb_fn != NULL) {
|
|
if (vfio_user_req->cb_fn(vfio_user_req, vfio_user_req->cb_arg) != 0) {
|
|
fail_ctrlr(qpair->ctrlr);
|
|
}
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&qpair->reqs, vfio_user_req, link);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_close_qpair(struct spdk_nvmf_qpair *qpair,
|
|
spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg)
|
|
{
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
|
|
assert(qpair != NULL);
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
destroy_qp(vu_qpair->ctrlr, qpair->qid);
|
|
|
|
if (cb_fn) {
|
|
cb_fn(cb_arg);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a preallocated spdk_nvmf_request or NULL if there isn't one available.
|
|
*/
|
|
static struct nvmf_vfio_user_req *
|
|
get_nvmf_vfio_user_req(struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
struct nvmf_vfio_user_req *req;
|
|
|
|
assert(qpair != NULL);
|
|
|
|
if (TAILQ_EMPTY(&qpair->reqs)) {
|
|
return NULL;
|
|
}
|
|
|
|
req = TAILQ_FIRST(&qpair->reqs);
|
|
TAILQ_REMOVE(&qpair->reqs, req, link);
|
|
memset(&req->cmd, 0, sizeof(req->cmd));
|
|
memset(&req->rsp, 0, sizeof(req->rsp));
|
|
req->iovcnt = 0;
|
|
|
|
return req;
|
|
}
|
|
|
|
static struct spdk_nvmf_request *
|
|
get_nvmf_req(struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
struct nvmf_vfio_user_req *req = get_nvmf_vfio_user_req(qpair);
|
|
|
|
if (req == NULL) {
|
|
return NULL;
|
|
}
|
|
return &req->req;
|
|
}
|
|
|
|
static int
|
|
get_nvmf_io_req_length(struct spdk_nvmf_request *req)
|
|
{
|
|
uint16_t nlb, nr;
|
|
uint32_t nsid;
|
|
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
|
|
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
|
|
struct spdk_nvmf_ns *ns;
|
|
|
|
nsid = cmd->nsid;
|
|
ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
|
|
if (ns == NULL || ns->bdev == NULL) {
|
|
SPDK_ERRLOG("unsuccessful query for nsid %u\n", cmd->nsid);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) {
|
|
nr = cmd->cdw10_bits.dsm.nr + 1;
|
|
return nr * sizeof(struct spdk_nvme_dsm_range);
|
|
}
|
|
|
|
nlb = (cmd->cdw12 & 0x0000ffffu) + 1;
|
|
return nlb * spdk_bdev_get_block_size(ns->bdev);
|
|
}
|
|
|
|
static int
|
|
map_admin_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req)
|
|
{
|
|
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
|
|
uint32_t len = 0;
|
|
int iovcnt;
|
|
|
|
req->xfer = cmd->opc & 0x3;
|
|
req->length = 0;
|
|
req->data = NULL;
|
|
|
|
switch (cmd->opc) {
|
|
case SPDK_NVME_OPC_IDENTIFY:
|
|
len = 4096; /* TODO: there should be a define somewhere for this */
|
|
break;
|
|
case SPDK_NVME_OPC_GET_LOG_PAGE:
|
|
len = (cmd->cdw10_bits.get_log_page.numdl + 1) * 4;
|
|
break;
|
|
}
|
|
|
|
if (!cmd->dptr.prp.prp1 || !len) {
|
|
return 0;
|
|
}
|
|
|
|
iovcnt = vfio_user_map_prps(ctrlr, req, req->iov, len);
|
|
if (iovcnt < 0) {
|
|
SPDK_ERRLOG("%s: map Admin Opc %x failed\n",
|
|
ctrlr_id(ctrlr), cmd->opc);
|
|
return -1;
|
|
}
|
|
|
|
req->length = len;
|
|
req->data = req->iov[0].iov_base;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handles an I/O command.
|
|
*
|
|
* Returns 0 on success and -errno on failure. Sets @submit on whether or not
|
|
* the request must be forwarded to NVMf.
|
|
*/
|
|
static int
|
|
map_io_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvmf_request *req)
|
|
{
|
|
int err = 0;
|
|
struct spdk_nvme_cmd *cmd;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(req != NULL);
|
|
|
|
cmd = &req->cmd->nvme_cmd;
|
|
req->xfer = spdk_nvme_opc_get_data_transfer(cmd->opc);
|
|
|
|
if (spdk_unlikely(req->xfer == SPDK_NVME_DATA_NONE)) {
|
|
return 0;
|
|
}
|
|
|
|
/* SGL isn't supported now */
|
|
assert(req->cmd->nvme_cmd.psdt == 0);
|
|
err = get_nvmf_io_req_length(req);
|
|
if (err < 0) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
req->length = err;
|
|
err = vfio_user_map_prps(ctrlr, req, req->iov, req->length);
|
|
if (err < 0) {
|
|
SPDK_ERRLOG("%s: failed to map PRP: %d\n", ctrlr_id(ctrlr), err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
req->data = req->iov[0].iov_base;
|
|
req->iovcnt = err;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
|
|
struct spdk_nvmf_request *req)
|
|
{
|
|
int err;
|
|
struct nvmf_vfio_user_req *vfio_user_req;
|
|
|
|
assert(ctrlr != NULL);
|
|
assert(cmd != NULL);
|
|
|
|
/*
|
|
* TODO: this means that there are no free requests available,
|
|
* returning -1 will fail the controller. Theoretically this error can
|
|
* be avoided completely by ensuring we have as many requests as slots
|
|
* in the SQ, plus one for the the property request.
|
|
*/
|
|
if (spdk_unlikely(req == NULL)) {
|
|
return -1;
|
|
}
|
|
|
|
vfio_user_req = SPDK_CONTAINEROF(req, struct nvmf_vfio_user_req, req);
|
|
vfio_user_req->cb_fn = handle_cmd_rsp;
|
|
vfio_user_req->cb_arg = SPDK_CONTAINEROF(req->qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
req->cmd->nvme_cmd = *cmd;
|
|
if (nvmf_qpair_is_admin_queue(req->qpair)) {
|
|
err = map_admin_cmd_req(ctrlr, req);
|
|
} else {
|
|
err = map_io_cmd_req(ctrlr, req);
|
|
}
|
|
|
|
if (spdk_unlikely(err < 0)) {
|
|
SPDK_ERRLOG("%s: map NVMe command opc 0x%x failed\n",
|
|
ctrlr_id(ctrlr), cmd->opc);
|
|
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
|
|
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
|
|
return handle_cmd_rsp(vfio_user_req, vfio_user_req->cb_arg);
|
|
}
|
|
|
|
spdk_nvmf_request_exec(req);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_qpair_poll(struct nvmf_vfio_user_qpair *qpair)
|
|
{
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
uint32_t new_tail;
|
|
|
|
assert(qpair != NULL);
|
|
|
|
ctrlr = qpair->ctrlr;
|
|
|
|
new_tail = *tdbl(ctrlr, &qpair->sq);
|
|
if (sq_head(qpair) != new_tail) {
|
|
int err = handle_sq_tdbl_write(ctrlr, new_tail, qpair);
|
|
if (err != 0) {
|
|
fail_ctrlr(ctrlr);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Called unconditionally, periodically, very frequently from SPDK to ask
|
|
* whether there's work to be done. This function consumes requests generated
|
|
* from read/write_bar0 by setting ctrlr->prop_req.dir. read_bar0, and
|
|
* occasionally write_bar0 -- though this may change, synchronously wait. This
|
|
* function also consumes requests by looking at the doorbells.
|
|
*/
|
|
static int
|
|
nvmf_vfio_user_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
|
|
{
|
|
struct nvmf_vfio_user_poll_group *vu_group;
|
|
struct nvmf_vfio_user_qpair *vu_qpair, *tmp;
|
|
|
|
assert(group != NULL);
|
|
|
|
spdk_rmb();
|
|
|
|
vu_group = SPDK_CONTAINEROF(group, struct nvmf_vfio_user_poll_group, group);
|
|
|
|
TAILQ_FOREACH_SAFE(vu_qpair, &vu_group->qps, link, tmp) {
|
|
if (spdk_unlikely(vu_qpair->state != VFIO_USER_QPAIR_ACTIVE || !vu_qpair->sq.size)) {
|
|
continue;
|
|
}
|
|
nvmf_vfio_user_qpair_poll(vu_qpair);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
|
|
struct spdk_nvme_transport_id *trid)
|
|
{
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
ctrlr = vu_qpair->ctrlr;
|
|
|
|
memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid));
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
|
|
struct spdk_nvme_transport_id *trid)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
nvmf_vfio_user_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
|
|
struct spdk_nvme_transport_id *trid)
|
|
{
|
|
struct nvmf_vfio_user_qpair *vu_qpair;
|
|
struct nvmf_vfio_user_ctrlr *ctrlr;
|
|
|
|
vu_qpair = SPDK_CONTAINEROF(qpair, struct nvmf_vfio_user_qpair, qpair);
|
|
ctrlr = vu_qpair->ctrlr;
|
|
|
|
memcpy(trid, &ctrlr->endpoint->trid, sizeof(*trid));
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
nvmf_vfio_user_opts_init(struct spdk_nvmf_transport_opts *opts)
|
|
{
|
|
opts->max_queue_depth = NVMF_VFIO_USER_DEFAULT_MAX_QUEUE_DEPTH;
|
|
opts->max_qpairs_per_ctrlr = NVMF_VFIO_USER_DEFAULT_MAX_QPAIRS_PER_CTRLR;
|
|
opts->in_capsule_data_size = NVMF_VFIO_USER_DEFAULT_IN_CAPSULE_DATA_SIZE;
|
|
opts->max_io_size = NVMF_VFIO_USER_DEFAULT_MAX_IO_SIZE;
|
|
opts->io_unit_size = NVMF_VFIO_USER_DEFAULT_IO_UNIT_SIZE;
|
|
opts->max_aq_depth = NVMF_VFIO_USER_DEFAULT_AQ_DEPTH;
|
|
opts->num_shared_buffers = NVMF_VFIO_USER_DEFAULT_NUM_SHARED_BUFFERS;
|
|
opts->buf_cache_size = NVMF_VFIO_USER_DEFAULT_BUFFER_CACHE_SIZE;
|
|
}
|
|
|
|
const struct spdk_nvmf_transport_ops spdk_nvmf_transport_vfio_user = {
|
|
.name = "VFIOUSER",
|
|
.type = SPDK_NVME_TRANSPORT_VFIOUSER,
|
|
.opts_init = nvmf_vfio_user_opts_init,
|
|
.create = nvmf_vfio_user_create,
|
|
.destroy = nvmf_vfio_user_destroy,
|
|
|
|
.listen = nvmf_vfio_user_listen,
|
|
.stop_listen = nvmf_vfio_user_stop_listen,
|
|
.accept = nvmf_vfio_user_accept,
|
|
.cdata_init = nvmf_vfio_user_cdata_init,
|
|
.listen_associate = nvmf_vfio_user_listen_associate,
|
|
|
|
.listener_discover = nvmf_vfio_user_discover,
|
|
|
|
.poll_group_create = nvmf_vfio_user_poll_group_create,
|
|
.poll_group_destroy = nvmf_vfio_user_poll_group_destroy,
|
|
.poll_group_add = nvmf_vfio_user_poll_group_add,
|
|
.poll_group_remove = nvmf_vfio_user_poll_group_remove,
|
|
.poll_group_poll = nvmf_vfio_user_poll_group_poll,
|
|
|
|
.req_free = nvmf_vfio_user_req_free,
|
|
.req_complete = nvmf_vfio_user_req_complete,
|
|
|
|
.qpair_fini = nvmf_vfio_user_close_qpair,
|
|
.qpair_get_local_trid = nvmf_vfio_user_qpair_get_local_trid,
|
|
.qpair_get_peer_trid = nvmf_vfio_user_qpair_get_peer_trid,
|
|
.qpair_get_listen_trid = nvmf_vfio_user_qpair_get_listen_trid,
|
|
};
|
|
|
|
SPDK_NVMF_TRANSPORT_REGISTER(muser, &spdk_nvmf_transport_vfio_user);
|
|
SPDK_LOG_REGISTER_COMPONENT(nvmf_vfio)
|