conf: No longer allow wildcard claiming of NVMe devices

All devices must be specified by BDF. Add support for scripts
to use lspci to grab the available NVMe device BDFs for the
current machine.

Change-Id: I4a53b335e3d516629f050ae1b2ab7aff8dd7f568
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Ben Walker 2017-01-25 16:36:40 -07:00
parent 8fefa7e9ee
commit 0829424e19
16 changed files with 156 additions and 128 deletions

View File

@ -17,6 +17,22 @@ The `identify` and `perf` NVMe examples were modified to add a consistent format
specifying remote NVMe over Fabrics devices via the `-r` option.
This is implemented using the new `spdk_nvme_transport_id_parse()` function.
### iSCSI Target
The [Nvme] section of the configuration file was modified to remove the `BDF` directive
and replace it with a `TransportID` directive. Both local (PCIe) and remote (NVMe-oF)
devices can now be specified as the backing block device. A script to generate an
entire [Nvme] section based on the local NVMe devices attached was added at
`scripts/gen_nvme.sh`.
### NVMe-oF Target
The [Nvme] section of the configuration file was modified to remove the `BDF` directive
and replace it with a `TransportID` directive. Both local (PCIe) and remote (NVMe-oF)
devices can now be specified as the backing block device. A script to generate an
entire [Nvme] section based on the local NVMe devices attached was added at
`scripts/gen_nvme.sh`.
## v16.12: NVMe over Fabrics host, hotplug, and multi-process
### NVMe library

View File

@ -100,23 +100,16 @@ the kernel to avoid interrupts and context switching.
~~~
[Nvme]
# NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their PCI
# domain, bus, device, and function. The format is dddd:bb:dd.f, which is
# the same format displayed by lspci or in /sys/bus/pci/devices. The second
# argument is a "name" for the device that can be anything. The name
# is referenced later in the Subsystem section.
#
# Alternatively, the user can specify ClaimAllDevices. All
# NVMe devices will be claimed.
BDF 0000:00:00.0
BDF 0000:01:00.0
# Users may specify which NVMe devices to claim by their transport id.
# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
# The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
# increases by 1 for each entry and Y is the namespace id, which starts at 1.
TransportID "trtype:PCIe traddr:0000:00:00.0"
TransportID "trtype:PCIe traddr:0000:01:00.0"
# The number of attempts per I/O when an I/O fails. Do not include
# this key to get the default behavior.
NvmeRetryCount 4
# The maximum number of NVMe controllers to claim. Do not include this key to
# claim all of them.
NumControllers 2
[TargetNodeX]
# other TargetNode parameters go here (TargetName, Mapping, etc.)

View File

@ -87,24 +87,17 @@
# NVMe configuration options
[Nvme]
# NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their PCI
# domain, bus, device, and function. The format is dddd:bb:dd.f, which is
# the same format displayed by lspci or in /sys/bus/pci/devices. The second
# argument is a "name" for the device that can be anything. The name
# is referenced later in the Subsystem section.
#
# Alternatively, the user can specify ClaimAllDevices. All
# NVMe devices will be claimed and named Nvme0, Nvme1, etc.
BDF 0000:00:00.0 Nvme0
BDF 0000:01:00.0 Nvme1
# NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their transport id.
# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
# The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
# increases by 1 for each entry and Y is the namespace id, which starts at 1.
TransportID "trtype:PCIe traddr:0000:00:00.0"
TransportID "trtype:PCIe traddr:0000:01:00.0"
# The number of attempts per I/O when an I/O fails. Do not include
# this key to get the default behavior.
NvmeRetryCount 4
# The maximum number of NVMe controllers to claim. Do not include this key to
# claim all of them.
NumControllers 2
# Registers the application to receive timeout callback and to reset the controller.
ResetControllerOnTimeout Yes
# Timeout value.

View File

@ -67,17 +67,12 @@
# NVMe configuration options
[Nvme]
# NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their PCI
# domain, bus, device, and function. The format is dddd:bb:dd.f, which is
# the same format displayed by lspci or in /sys/bus/pci/devices. The second
# argument is a "name" for the device that can be anything. The name
# is referenced later in the Subsystem section.
#
# Alternatively, the user can specify ClaimAllDevices. All
# NVMe devices will be claimed and named Nvme0, Nvme1, etc.
#BDF 0000:81:00.0 Nvme0
#BDF 0000:01:00.0 Nvme1
ClaimAllDevices
# Users may specify which NVMe devices to claim by their transport id.
# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
# The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
# increases by 1 for each entry and Y is the namespace id, which starts at 1.
TransportID "trtype:PCIe traddr:0000:00:00.0"
TransportID "trtype:PCIe traddr:0000:01:00.0"
# The number of attempts per I/O when an I/O fails. Do not include
# this key to get the default behavior.

View File

@ -62,7 +62,7 @@ struct nvme_ctrlr {
* target for CONTROLLER IDENTIFY command during initialization
*/
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_pci_addr pci_addr;
struct spdk_nvme_transport_id trid;
struct spdk_poller *adminq_timer_poller;
@ -106,9 +106,8 @@ enum data_direction {
};
struct nvme_probe_ctx {
int controllers_remaining;
int num_whitelist_controllers;
struct spdk_pci_addr whitelist[NVME_MAX_CONTROLLERS];
size_t count;
struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
};
static int nvme_controller_index = 0;
@ -399,10 +398,39 @@ bdev_nvme_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w
spdk_json_write_name(w, "nvme");
spdk_json_write_object_begin(w);
spdk_json_write_name(w, "pci_address");
spdk_json_write_string_fmt(w, "%04x:%02x:%02x.%x", nvme_ctrlr->pci_addr.domain,
nvme_ctrlr->pci_addr.bus, nvme_ctrlr->pci_addr.dev,
nvme_ctrlr->pci_addr.func);
if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
spdk_json_write_name(w, "pci_address");
spdk_json_write_string(w, nvme_ctrlr->trid.traddr);
}
spdk_json_write_name(w, "trid");
spdk_json_write_object_begin(w);
spdk_json_write_name(w, "trtype");
if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
spdk_json_write_string(w, "PCIe");
} else if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) {
spdk_json_write_string(w, "RDMA");
} else {
spdk_json_write_string(w, "Unknown");
}
if (nvme_ctrlr->trid.traddr) {
spdk_json_write_name(w, "traddr");
spdk_json_write_string(w, nvme_ctrlr->trid.traddr);
}
if (nvme_ctrlr->trid.trsvcid) {
spdk_json_write_name(w, "trsvcid");
spdk_json_write_string(w, nvme_ctrlr->trid.trsvcid);
}
if (nvme_ctrlr->trid.subnqn) {
spdk_json_write_name(w, "subnqn");
spdk_json_write_string(w, nvme_ctrlr->trid.subnqn);
}
spdk_json_write_object_end(w);
spdk_json_write_name(w, "ctrlr_data");
spdk_json_write_object_begin(w);
@ -502,29 +530,15 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
struct spdk_nvme_ctrlr_opts *opts)
{
struct nvme_probe_ctx *ctx = cb_ctx;
int i;
size_t i;
bool claim_device = false;
struct spdk_pci_addr pci_addr;
if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
return false;
}
SPDK_NOTICELOG("Probing device %s\n", trid->traddr);
SPDK_NOTICELOG("Probing device %s\n",
trid->traddr);
if (ctx->controllers_remaining == 0) {
return false;
}
if (ctx->num_whitelist_controllers == 0) {
claim_device = true;
} else {
for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
if (spdk_pci_addr_compare(&pci_addr, &ctx->whitelist[i]) == 0) {
claim_device = true;
break;
}
for (i = 0; i < ctx->count; i++) {
if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
claim_device = true;
break;
}
}
@ -532,12 +546,17 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
return false;
}
/* Claim the device in case conflict with other process */
if (spdk_pci_device_claim(&pci_addr) != 0) {
return false;
}
if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
struct spdk_pci_addr pci_addr;
ctx->controllers_remaining--;
if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
return false;
}
if (spdk_pci_device_claim(&pci_addr) != 0) {
return false;
}
}
return true;
}
@ -571,7 +590,7 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
dev->adminq_timer_poller = NULL;
dev->ctrlr = ctrlr;
dev->ref = 0;
spdk_pci_addr_parse(&dev->pci_addr, trid->traddr);
dev->trid = *trid;
dev->id = nvme_controller_index++;
nvme_ctrlr_create_bdevs(dev, dev->id);
@ -590,12 +609,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
}
static struct nvme_ctrlr *
nvme_ctrlr_get(struct spdk_pci_addr *addr)
nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
{
struct nvme_ctrlr *nvme_ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
if (spdk_pci_addr_compare(&nvme_ctrlr->pci_addr, addr) == 0) {
if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->trid) == 0) {
return nvme_ctrlr;
}
}
@ -648,21 +667,18 @@ spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
struct nvme_bdev *nvme_bdev;
size_t j;
if (spdk_pci_addr_parse(&probe_ctx.whitelist[0], trid->traddr) < 0) {
return -1;
}
probe_ctx.num_whitelist_controllers = 1;
probe_ctx.controllers_remaining = 1;
if (nvme_ctrlr_get(&probe_ctx.whitelist[0]) != NULL) {
if (nvme_ctrlr_get(trid) != NULL) {
SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
return -1;
}
probe_ctx.count = 1;
probe_ctx.trids[0] = *trid;
if (spdk_nvme_probe(trid, &probe_ctx, probe_cb, attach_cb, NULL)) {
return -1;
}
nvme_ctrlr = nvme_ctrlr_get(&probe_ctx.whitelist[0]);
nvme_ctrlr = nvme_ctrlr_get(trid);
if (!nvme_ctrlr) {
return -1;
}
@ -693,49 +709,32 @@ bdev_nvme_library_init(void)
{
struct spdk_conf_section *sp;
const char *val;
int i;
struct nvme_probe_ctx probe_ctx;
int i, rc;
struct nvme_probe_ctx probe_ctx = {};
sp = spdk_conf_find_section(NULL, "Nvme");
if (sp == NULL) {
/*
* If configuration file did not specify the Nvme section, do
* not take the time to initialize the NVMe devices.
*/
return 0;
}
spdk_nvme_retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount");
if (spdk_nvme_retry_count < 0)
if (spdk_nvme_retry_count < 0) {
spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
/*
* If NumControllers is not found, this will return -1, which we
* will later use to denote that we should initialize all
* controllers.
*/
num_controllers = spdk_conf_section_get_intval(sp, "NumControllers");
/* Init the whitelist */
probe_ctx.num_whitelist_controllers = 0;
if (num_controllers > 0) {
for (i = 0; ; i++) {
val = spdk_conf_section_get_nmval(sp, "BDF", i, 0);
if (val == NULL) {
break;
}
if (spdk_pci_addr_parse(&probe_ctx.whitelist[probe_ctx.num_whitelist_controllers], val) < 0) {
SPDK_ERRLOG("Invalid format for BDF: %s\n", val);
return -1;
}
probe_ctx.num_whitelist_controllers++;
}
}
probe_ctx.controllers_remaining = num_controllers;
for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
if (val == NULL) {
break;
}
rc = spdk_nvme_transport_id_parse(&probe_ctx.trids[i], val);
if (rc < 0) {
SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
return -1;
}
probe_ctx.count++;
}
val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
if (val != NULL) {

22
scripts/gen_nvme.sh Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -e
case `uname` in
FreeBSD)
bdfs=$(pciconf -l | grep "class=0x010802" | awk -F: ' {printf "0000:%02X:%02X.%X\n", $2, $3, $4}')
;;
Linux)
bdfs=$(lspci -mm -n | grep 0108 | tr -d '"' | awk -F " " '{print "0000:"$1}')
;;
*)
exit 1
;;
esac
echo "[Nvme]"
i=0
for bdf in $bdfs; do
echo " TransportID \"trtype:PCIe traddr:$bdf\""
let i=i+1
done

View File

@ -24,6 +24,9 @@ fi
timing_enter ext4test
cp $testdir/iscsi.conf.in $testdir/iscsi.conf
$rootdir/scripts/gen_nvme.sh >> $testdir/iscsi.conf
# iSCSI target configuration
PORT=3260
RPC_PORT=5260
@ -100,6 +103,7 @@ done
trap - SIGINT SIGTERM EXIT
rm -f $testdir/iscsi.conf
iscsicleanup
killprocess $pid
timing_exit ext4test

View File

@ -43,8 +43,6 @@
# Do not specify InitiatorGroup, PortalGroup, Malloc,
# or TargetNode entries here - the autotest.sh script
# will use RPC to set up this part of the configuration.
[Nvme]
NumControllers 1
[Malloc]
NumberOfLuns 1

View File

@ -38,6 +38,9 @@ fi
timing_enter fio
cp $testdir/iscsi.conf.in $testdir/iscsi.conf
$rootdir/scripts/gen_nvme.sh >> $testdir/iscsi.conf
# iSCSI target configuration
PORT=3260
RPC_PORT=5260
@ -92,5 +95,6 @@ rm -f ./local-job0-0-verify.state
trap - SIGINT SIGTERM EXIT
iscsicleanup
rm -f $testdir/iscsi.conf
killprocess $pid
timing_exit fio

View File

@ -12,5 +12,3 @@
[Rpc]
Enable Yes
[Nvme]

View File

@ -1,5 +1,3 @@
[Nvme]
# autotest.sh will automatically rmmod ioatdma, so we do
# not need to specify Whitelist
# entries to enable ioat offload for this malloc LUN

View File

@ -10,6 +10,9 @@ testdir=$(readlink -f $(dirname $0))
timing_enter blockdev
cp $testdir/bdev.conf.in $testdir/bdev.conf
$rootdir/scripts/gen_nvme.sh >> $testdir/bdev.conf
timing_enter bounds
$testdir/bdevio/bdevio $testdir/bdev.conf
timing_exit bounds
@ -34,4 +37,5 @@ if [ $RUN_NIGHTLY -eq 1 ]; then
timing_exit unmap
fi
rm -f $testdir/bdev.conf
timing_exit blockdev

View File

@ -39,6 +39,9 @@ qemu-img create -f qcow2 -o backing_file=$VM_IMG $VM_BAK_IMG
cp $testdir/spdk_vm_base.xml $testdir/spdk_vm.xml
cp $testdir/spdk_vnet_base.xml $testdir/spdk_vnet.xml
cp $testdir/vhost.conf.in $testdir/vhost.conf
$rootdir/scripts/gen_nvme.sh >> $testdir/vhost.conf
sed -i "s@<name></name>@<name>$VM_NAME</name>@g" $testdir/spdk_vm.xml
sed -i "s@source file=''@source file='$VM_BAK_IMG'@g" $testdir/spdk_vm.xml
sed -i "s@<emulator></emulator>@<emulator>$VM_QEMU</emulator>@g" $testdir/spdk_vm.xml
@ -93,5 +96,6 @@ trap - SIGINT SIGTERM EXIT
cleanup_virsh
rm $testdir/spdk_vm.xml
rm $testdir/spdk_vnet.xml
rm $testdir/vhost.conf
killprocess $pid
timing_exit ext4test

View File

@ -38,9 +38,6 @@
NumberOfLuns 1
LunSizeInMb 512
[Nvme]
UnbindFromKernel Yes
[VhostScsi0]
Name naa.123
Dev 0 Nvme0n1

View File

@ -116,6 +116,7 @@ function spdk_vhost_run()
local vhost_log_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.log"
local vhost_pid_file="$SPDK_VHOST_SCSI_TEST_DIR/vhost.pid"
local vhost_socket="$SPDK_VHOST_SCSI_TEST_DIR/usvhost"
local vhost_conf_template="$BASE_DIR/vhost.conf.in"
local vhost_conf_file="$BASE_DIR/vhost.conf"
echo "INFO: starting vhost app in background"
[[ -r "$vhost_pid_file" ]] && spdk_vhost_kill
@ -127,6 +128,9 @@ function spdk_vhost_run()
return 1
fi
cp $vhost_conf_template $vhost_conf_file
$BASE_DIR/../../../scripts/gen_nvme.sh >> $vhost_conf_file
local cmd="$vhost_app -m $(cat $BASE_DIR/autotest.config|grep vhost_reactor_mask|awk -F'=' '{print $2}') \
-p $(cat $BASE_DIR/autotest.config|grep vhost_master_core|awk -F'=' '{print $2}') \
-c $vhost_conf_file"
@ -142,6 +146,8 @@ function spdk_vhost_run()
sleep 25
kill -0 $(cat $vhost_pid_file)
echo "INFO: vhost started - pid=$(cat $vhost_pid_file)"
rm $vhost_conf_file
}
function spdk_vhost_kill()

View File

@ -34,8 +34,5 @@
[Ioat]
Disable Yes
[Nvme]
ClaimAllDevices
[Split]
Split Nvme0n1 4