2016-06-07 21:32:27 +00:00
|
|
|
/*-
|
|
|
|
* BSD LICENSE
|
|
|
|
*
|
|
|
|
* Copyright (c) Intel Corporation.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
* * Neither the name of Intel Corporation nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived
|
|
|
|
* from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include <rte_config.h>
|
|
|
|
#include <rte_mempool.h>
|
|
|
|
#include <rte_malloc.h>
|
|
|
|
|
|
|
|
#include "spdk/nvme.h"
|
|
|
|
#include "spdk/pci.h"
|
|
|
|
|
|
|
|
struct ctrlr_entry {
|
|
|
|
struct spdk_nvme_ctrlr *ctrlr;
|
|
|
|
struct ctrlr_entry *next;
|
|
|
|
char name[1024];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ns_entry {
|
|
|
|
struct spdk_nvme_ctrlr *ctrlr;
|
|
|
|
struct spdk_nvme_ns *ns;
|
|
|
|
struct ns_entry *next;
|
|
|
|
struct spdk_nvme_qpair *qpair;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct rte_mempool *request_mempool;
|
|
|
|
|
|
|
|
static struct ctrlr_entry *g_controllers = NULL;
|
|
|
|
static struct ns_entry *g_namespaces = NULL;
|
|
|
|
|
|
|
|
static void
|
|
|
|
register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
|
|
|
|
{
|
|
|
|
struct ns_entry *entry;
|
|
|
|
const struct spdk_nvme_ctrlr_data *cdata;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe
|
|
|
|
* controller. During initialization, the IDENTIFY data for the
|
|
|
|
* controller is read using an NVMe admin command, and that data
|
|
|
|
* can be retrieved using spdk_nvme_ctrlr_get_data() to get
|
|
|
|
* detailed information on the controller. Refer to the NVMe
|
|
|
|
* specification for more details on IDENTIFY for NVMe controllers.
|
|
|
|
*/
|
|
|
|
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
|
|
|
|
|
|
|
|
if (!spdk_nvme_ns_is_active(ns)) {
|
|
|
|
printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
|
|
|
|
cdata->mn, cdata->sn,
|
|
|
|
spdk_nvme_ns_get_id(ns));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
entry = malloc(sizeof(struct ns_entry));
|
|
|
|
if (entry == NULL) {
|
|
|
|
perror("ns_entry malloc");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
entry->ctrlr = ctrlr;
|
|
|
|
entry->ns = ns;
|
|
|
|
entry->next = g_namespaces;
|
|
|
|
g_namespaces = entry;
|
|
|
|
|
|
|
|
printf(" Namespace ID: %d size: %juGB\n", spdk_nvme_ns_get_id(ns),
|
|
|
|
spdk_nvme_ns_get_size(ns) / 1000000000);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct hello_world_sequence {
|
|
|
|
struct ns_entry *ns_entry;
|
|
|
|
char *buf;
|
|
|
|
int is_completed;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
read_complete(void *arg, const struct spdk_nvme_cpl *completion)
|
|
|
|
{
|
|
|
|
struct hello_world_sequence *sequence = arg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The read I/O has completed. Print the contents of the
|
|
|
|
* buffer, free the buffer, then mark the sequence as
|
|
|
|
* completed. This will trigger the hello_world() function
|
|
|
|
* to exit its polling loop.
|
|
|
|
*/
|
|
|
|
printf("%s", sequence->buf);
|
|
|
|
rte_free(sequence->buf);
|
|
|
|
sequence->is_completed = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
write_complete(void *arg, const struct spdk_nvme_cpl *completion)
|
|
|
|
{
|
|
|
|
struct hello_world_sequence *sequence = arg;
|
|
|
|
struct ns_entry *ns_entry = sequence->ns_entry;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The write I/O has completed. Free the buffer associated with
|
|
|
|
* the write I/O and allocate a new zeroed buffer for reading
|
|
|
|
* the data back from the NVMe namespace.
|
|
|
|
*/
|
|
|
|
rte_free(sequence->buf);
|
|
|
|
sequence->buf = rte_zmalloc(NULL, 0x1000, 0x1000);
|
|
|
|
|
|
|
|
rc = spdk_nvme_ns_cmd_read(ns_entry->ns, ns_entry->qpair, sequence->buf,
|
|
|
|
0, /* LBA start */
|
|
|
|
1, /* number of LBAs */
|
|
|
|
read_complete, (void *)sequence, 0);
|
|
|
|
if (rc != 0) {
|
|
|
|
fprintf(stderr, "starting read I/O failed\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
hello_world(void)
|
|
|
|
{
|
|
|
|
struct ns_entry *ns_entry;
|
|
|
|
struct hello_world_sequence sequence;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
ns_entry = g_namespaces;
|
|
|
|
while (ns_entry != NULL) {
|
|
|
|
/*
|
|
|
|
* Allocate an I/O qpair that we can use to submit read/write requests
|
|
|
|
* to namespaces on the controller. NVMe controllers typically support
|
|
|
|
* many qpairs per controller. Any I/O qpair allocated for a controller
|
|
|
|
* can submit I/O to any namespace on that controller.
|
|
|
|
*
|
|
|
|
* The SPDK NVMe driver provides no synchronization for qpair accesses -
|
|
|
|
* the application must ensure only a single thread submits I/O to a
|
|
|
|
* qpair, and that same thread must also check for completions on that
|
|
|
|
* qpair. This enables extremely efficient I/O processing by making all
|
|
|
|
* I/O operations completely lockless.
|
|
|
|
*/
|
|
|
|
ns_entry->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, 0);
|
|
|
|
if (ns_entry->qpair == NULL) {
|
|
|
|
printf("ERROR: init_ns_worker_ctx() failed\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use DPDK rte_zmalloc to allocate a 4KB zeroed buffer. This memory
|
|
|
|
* will be allocated from 2MB hugepages and will be pinned. These are
|
|
|
|
* both requirements for data buffers used for SPDK NVMe I/O operations.
|
|
|
|
*/
|
|
|
|
sequence.buf = rte_zmalloc(NULL, 0x1000, 0x1000);
|
|
|
|
sequence.is_completed = 0;
|
|
|
|
sequence.ns_entry = ns_entry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Print "Hello world!" to sequence.buf. We will write this data to LBA
|
|
|
|
* 0 on the namespace, and then later read it back into a separate buffer
|
|
|
|
* to demonstrate the full I/O path.
|
|
|
|
*/
|
|
|
|
sprintf(sequence.buf, "Hello world!\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write the data buffer to LBA 0 of this namespace. "write_complete" and
|
|
|
|
* "&sequence" are specified as the completion callback function and
|
|
|
|
* argument respectively. write_complete() will be called with the
|
|
|
|
* value of &sequence as a parameter when the write I/O is completed.
|
|
|
|
* This allows users to potentially specify different completion
|
|
|
|
* callback routines for each I/O, as well as pass a unique handle
|
|
|
|
* as an argument so the application knows which I/O has completed.
|
|
|
|
*
|
|
|
|
* Note that the SPDK NVMe driver will only check for completions
|
|
|
|
* when the application calls spdk_nvme_qpair_process_completions().
|
|
|
|
* It is the responsibility of the application to trigger the polling
|
|
|
|
* process.
|
|
|
|
*/
|
|
|
|
rc = spdk_nvme_ns_cmd_write(ns_entry->ns, ns_entry->qpair, sequence.buf,
|
|
|
|
0, /* LBA start */
|
|
|
|
1, /* number of LBAs */
|
|
|
|
write_complete, &sequence, 0);
|
|
|
|
if (rc != 0) {
|
|
|
|
fprintf(stderr, "starting write I/O failed\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Poll for completions. 0 here means process all available completions.
|
|
|
|
* In certain usage models, the caller may specify a positive integer
|
|
|
|
* instead of 0 to signify the maximum number of completions it should
|
|
|
|
* process. This function will never block - if there are no
|
|
|
|
* completions pending on the specified qpair, it will return immediately.
|
|
|
|
*
|
|
|
|
* When the write I/O completes, write_complete() will submit a new I/O
|
|
|
|
* to read LBA 0 into a separate buffer, specifying read_complete() as its
|
|
|
|
* completion routine. When the read I/O completes, read_complete() will
|
|
|
|
* print the buffer contents and set sequence.is_completed = 1. That will
|
|
|
|
* break this loop and then exit the program.
|
|
|
|
*/
|
|
|
|
while (!sequence.is_completed) {
|
|
|
|
spdk_nvme_qpair_process_completions(ns_entry->qpair, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the I/O qpair. This typically is done when an application exits.
|
|
|
|
* But SPDK does support freeing and then reallocating qpairs during
|
|
|
|
* operation. It is the responsibility of the caller to ensure all
|
|
|
|
* pending I/O are completed before trying to free the qpair.
|
|
|
|
*/
|
|
|
|
spdk_nvme_ctrlr_free_io_qpair(ns_entry->qpair);
|
|
|
|
ns_entry = ns_entry->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
probe_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr_opts *opts)
|
|
|
|
{
|
|
|
|
if (spdk_pci_device_has_non_uio_driver(dev)) {
|
|
|
|
/*
|
|
|
|
* If an NVMe controller is found, but it is attached to a non-uio
|
|
|
|
* driver (i.e. the kernel NVMe driver), we will not try to attach
|
|
|
|
* to it.
|
|
|
|
*/
|
|
|
|
fprintf(stderr, "non-uio kernel driver attached to NVMe\n");
|
|
|
|
fprintf(stderr, " controller at PCI address %04x:%02x:%02x.%02x\n",
|
|
|
|
spdk_pci_device_get_domain(dev),
|
|
|
|
spdk_pci_device_get_bus(dev),
|
|
|
|
spdk_pci_device_get_dev(dev),
|
|
|
|
spdk_pci_device_get_func(dev));
|
|
|
|
fprintf(stderr, " skipping...\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Attaching to %04x:%02x:%02x.%02x\n",
|
|
|
|
spdk_pci_device_get_domain(dev),
|
|
|
|
spdk_pci_device_get_bus(dev),
|
|
|
|
spdk_pci_device_get_dev(dev),
|
|
|
|
spdk_pci_device_get_func(dev));
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
attach_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr *ctrlr,
|
|
|
|
const struct spdk_nvme_ctrlr_opts *opts)
|
|
|
|
{
|
|
|
|
int nsid, num_ns;
|
|
|
|
struct ctrlr_entry *entry;
|
|
|
|
const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
|
|
|
|
|
|
|
|
entry = malloc(sizeof(struct ctrlr_entry));
|
|
|
|
if (entry == NULL) {
|
|
|
|
perror("ctrlr_entry malloc");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Attached to %04x:%02x:%02x.%02x\n",
|
|
|
|
spdk_pci_device_get_domain(dev),
|
|
|
|
spdk_pci_device_get_bus(dev),
|
|
|
|
spdk_pci_device_get_dev(dev),
|
|
|
|
spdk_pci_device_get_func(dev));
|
|
|
|
|
|
|
|
snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
|
|
|
|
|
|
|
|
entry->ctrlr = ctrlr;
|
|
|
|
entry->next = g_controllers;
|
|
|
|
g_controllers = entry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each controller has one of more namespaces. An NVMe namespace is basically
|
|
|
|
* equivalent to a SCSI LUN. The controller's IDENTIFY data tells us how
|
|
|
|
* many namespaces exist on the controller. For Intel(R) P3X00 controllers,
|
|
|
|
* it will just be one namespace.
|
|
|
|
*
|
|
|
|
* Note that in NVMe, namespace IDs start at 1, not 0.
|
|
|
|
*/
|
|
|
|
num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
|
|
|
|
printf("Using controller %s with %d namespaces.\n", entry->name, num_ns);
|
|
|
|
for (nsid = 1; nsid <= num_ns; nsid++) {
|
|
|
|
register_ns(ctrlr, spdk_nvme_ctrlr_get_ns(ctrlr, nsid));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
cleanup(void)
|
|
|
|
{
|
|
|
|
struct ns_entry *ns_entry = g_namespaces;
|
|
|
|
struct ctrlr_entry *ctrlr_entry = g_controllers;
|
|
|
|
|
|
|
|
while (ns_entry) {
|
|
|
|
struct ns_entry *next = ns_entry->next;
|
|
|
|
free(ns_entry);
|
|
|
|
ns_entry = next;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (ctrlr_entry) {
|
|
|
|
struct ctrlr_entry *next = ctrlr_entry->next;
|
|
|
|
|
|
|
|
spdk_nvme_detach(ctrlr_entry->ctrlr);
|
|
|
|
free(ctrlr_entry);
|
|
|
|
ctrlr_entry = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ealargs[] = {
|
|
|
|
"hello_world",
|
|
|
|
"-c 0x1",
|
|
|
|
};
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* By default, the SPDK NVMe driver uses DPDK for huge page-based
|
|
|
|
* memory management and NVMe request buffer pools. Huge pages can
|
|
|
|
* be either 2MB or 1GB in size (instead of 4KB) and are pinned in
|
|
|
|
* memory. Pinned memory is important to ensure DMA operations
|
|
|
|
* never target swapped out memory.
|
|
|
|
*
|
|
|
|
* So first we must initialize DPDK. "-c 0x1" indicates to only use
|
|
|
|
* core 0.
|
|
|
|
*/
|
|
|
|
rc = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]), ealargs);
|
|
|
|
if (rc < 0) {
|
|
|
|
fprintf(stderr, "could not initialize dpdk\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the NVMe request buffer pool. This will be used internally
|
|
|
|
* by the SPDK NVMe driver to allocate an spdk_nvme_request data
|
|
|
|
* structure for each I/O request. This is implicitly passed to
|
|
|
|
* the SPDK NVMe driver via an extern declaration in nvme_impl.h.
|
|
|
|
*/
|
|
|
|
request_mempool = rte_mempool_create("nvme_request", 8192,
|
|
|
|
spdk_nvme_request_size(), 128, 0,
|
|
|
|
NULL, NULL, NULL, NULL,
|
|
|
|
SOCKET_ID_ANY, 0);
|
|
|
|
|
|
|
|
if (request_mempool == NULL) {
|
|
|
|
fprintf(stderr, "could not initialize request mempool\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Initializing NVMe Controllers\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start the SPDK NVMe enumeration process. probe_cb will be called
|
|
|
|
* for each NVMe controller found, giving our application a choice on
|
|
|
|
* whether to attach to each controller. attach_cb will then be
|
|
|
|
* called for each controller after the SPDK NVMe driver has completed
|
|
|
|
* initializing the controller we chose to attach.
|
|
|
|
*/
|
2016-06-21 23:49:26 +00:00
|
|
|
rc = spdk_nvme_probe(NULL, probe_cb, attach_cb, NULL);
|
2016-06-07 21:32:27 +00:00
|
|
|
if (rc != 0) {
|
|
|
|
fprintf(stderr, "spdk_nvme_probe() failed\n");
|
|
|
|
cleanup();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("Initialization complete.\n");
|
|
|
|
hello_world();
|
|
|
|
cleanup();
|
|
|
|
return 0;
|
|
|
|
}
|