nvmf: No longer use in capsule data buffers for CONNECT

Previously, the shared buffer pools were allocated on the
nvmf controllers. When a new connection was established,
the CONNECT command needs a 4k buffer, but we didn't know
which nvmf controller it belonged to until after the
CONNECT command completed. So there was a special case
for CONNECT that used in capsule data buffers instead.

Now, the buffer pool is global and always available. We
can just use that always, with no more special cases.

This has the additional nice side effect of allowing
users to run the target with no in capsule data buffers
allocated at all.

Change-Id: I974289f646947651c58d65cf898571d80e9dee9b
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/374360
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
This commit is contained in:
Ben Walker 2017-08-15 14:55:41 -07:00
parent d4da7106b3
commit e5f6a99b61
3 changed files with 66 additions and 75 deletions

View File

@ -31,6 +31,12 @@ The NVMe driver now recognizes the NVMe 1.3 Namespace Optimal I/O Boundary field
NVMe 1.3 devices may report an optimal I/O boundary, which the driver will take
into account when splitting I/O requests.
### NVMe-oF Target (nvmf)
The NVMe-oF target no longer requires any in capsule data buffers to run, and
the feature is now entirely optional. Previously, at least 4KiB in capsule
data buffers were required.
### Environment Abstraction Layer
A new default value, SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, was added to provide

View File

@ -63,6 +63,7 @@
#MaxQueueDepth 128
# Set the maximum in-capsule data size. Must be a multiple of 16.
# 0 is a valid choice.
#InCapsuleDataSize 4096
# Set the maximum I/O size. Must be a multiple of 4096.

View File

@ -717,15 +717,49 @@ typedef enum _spdk_nvmf_request_prep_type {
SPDK_NVMF_REQUEST_PREP_PENDING_DATA = 2,
} spdk_nvmf_request_prep_type;
static int
spdk_nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct spdk_nvmf_rdma_device *device = cb_ctx;
struct ibv_pd *pd = device->pd;
struct ibv_mr *mr;
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -1;
} else {
spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr);
spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
if (mr) {
ibv_dereg_mr(mr);
}
break;
}
return 0;
}
static spdk_nvmf_request_prep_type
spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req)
{
struct spdk_nvme_cmd *cmd;
struct spdk_nvme_cpl *rsp;
struct spdk_nvmf_rdma_request *rdma_req;
struct spdk_nvmf_rdma_poll_group *rgroup;
struct spdk_nvme_sgl_descriptor *sgl;
struct spdk_nvmf_rdma_transport *rtransport;
struct spdk_nvmf_rdma_device *device;
cmd = &req->cmd->nvme_cmd;
rsp = &req->rsp->nvme_cpl;
@ -758,6 +792,7 @@ spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req)
}
rtransport = SPDK_CONTAINEROF(req->qpair->transport, struct spdk_nvmf_rdma_transport, transport);
device = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_rdma_qpair, qpair)->port->device;
sgl = &cmd->dptr.sgl1;
if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK &&
@ -776,41 +811,22 @@ spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req)
}
req->length = sgl->keyed.length;
req->data = spdk_mempool_get(rtransport->data_buf_pool);
if (!req->data) {
/* No available buffers. Queue this request up. */
SPDK_TRACELOG(SPDK_TRACE_RDMA, "No available large data buffers. Queueing request %p\n", req);
return SPDK_NVMF_REQUEST_PREP_PENDING_BUFFER;
}
rdma_req->data_from_pool = true;
rdma_req->data.sgl[0].addr = (uintptr_t)req->data;
rdma_req->data.sgl[0].length = sgl->keyed.length;
rdma_req->data.sgl[0].lkey = ((struct ibv_mr *)spdk_mem_map_translate(device->map,
(uint64_t)req->data))->lkey;
rdma_req->data.wr.wr.rdma.rkey = sgl->keyed.key;
rdma_req->data.wr.wr.rdma.remote_addr = sgl->address;
if (!req->qpair->ctrlr) {
/* The only time a connection won't have a ctrlr
* is when this is the CONNECT request.
*/
assert(cmd->opc == SPDK_NVME_OPC_FABRIC);
assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
assert(req->length <= rtransport->in_capsule_data_size);
/* Use the in capsule data buffer, even though this isn't in capsule data. */
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Request using in capsule buffer for non-capsule data\n");
req->data = rdma_req->recv->buf;
rdma_req->data.sgl[0].lkey = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_rdma_qpair,
qpair)->bufs_mr->lkey;
rdma_req->data_from_pool = false;
} else {
rgroup = SPDK_CONTAINEROF(req->qpair->ctrlr->group, struct spdk_nvmf_rdma_poll_group, group);
req->data = spdk_mempool_get(rtransport->data_buf_pool);
if (!req->data) {
/* No available buffers. Queue this request up. */
SPDK_TRACELOG(SPDK_TRACE_RDMA, "No available large data buffers. Queueing request %p\n", req);
return SPDK_NVMF_REQUEST_PREP_PENDING_BUFFER;
}
rdma_req->data.sgl[0].lkey = ((struct ibv_mr *)spdk_mem_map_translate(rgroup->device->map,
(uint64_t)req->data))->lkey;
rdma_req->data_from_pool = true;
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Request %p took buffer from central pool\n", req);
}
rdma_req->data.sgl[0].addr = (uintptr_t)req->data;
SPDK_TRACELOG(SPDK_TRACE_RDMA, "Request %p took buffer from central pool\n", req);
if (req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
return SPDK_NVMF_REQUEST_PREP_PENDING_DATA;
@ -1122,6 +1138,17 @@ spdk_nvmf_rdma_listen(struct spdk_nvmf_transport *transport,
return -EINVAL;
}
if (!device->map) {
device->pd = port->id->pd;
device->map = spdk_mem_map_alloc(0, spdk_nvmf_rdma_mem_notify, device);
if (!device->map) {
SPDK_ERRLOG("Unable to allocate memory map for new poll group\n");
return -1;
}
} else {
assert(device->pd == port->id->pd);
}
SPDK_NOTICELOG("*** NVMf Target Listening on %s port %d ***\n",
port->trid.traddr, ntohs(rdma_get_src_port(port->id)));
@ -1286,40 +1313,6 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_poll_group *group)
free(rgroup);
}
static int
spdk_nvmf_rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action,
void *vaddr, size_t size)
{
struct spdk_nvmf_rdma_device *device = cb_ctx;
struct ibv_pd *pd = device->pd;
struct ibv_mr *mr;
switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER:
mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -1;
} else {
spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr);
spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
if (mr) {
ibv_dereg_mr(mr);
}
break;
}
return 0;
}
static int
spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_qpair *qpair)
@ -1357,15 +1350,6 @@ spdk_nvmf_rdma_poll_group_add(struct spdk_nvmf_poll_group *group,
return -EINVAL;
}
if (!device->map) {
device->pd = rdma_qpair->cm_id->pd;
device->map = spdk_mem_map_alloc(0, spdk_nvmf_rdma_mem_notify, device);
if (!device->map) {
SPDK_ERRLOG("Unable to allocate memory map for new poll group\n");
return -1;
}
}
rgroup->device = device;
return 0;