From efccac8c7e6b810bfceda8dd50bf97241e4f3c39 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Mon, 29 Aug 2016 15:00:32 -0700 Subject: [PATCH] bdev/nvme: remove NvmeLunsPerNs and LunSizeInMB These were only intended for testing and should be replaced by a virtual blockdev that can be layered on top of any kind of bdev. Change-Id: I3ba2cc94630a6c6748d96e3401fee05aaabe20e0 Signed-off-by: Daniel Verkamp --- doc/iscsi/getting_started.md | 16 +--- etc/spdk/iscsi.conf.in | 4 - lib/bdev/nvme/blockdev_nvme.c | 141 +++++++++------------------- test/iscsi_tgt/ext4test/ext4test.sh | 2 +- test/iscsi_tgt/ext4test/iscsi.conf | 2 - test/iscsi_tgt/fio/iscsi.conf | 1 - test/lib/bdev/bdev.conf | 1 - 7 files changed, 48 insertions(+), 119 deletions(-) diff --git a/doc/iscsi/getting_started.md b/doc/iscsi/getting_started.md index a6bd4f74e..13756ad6b 100644 --- a/doc/iscsi/getting_started.md +++ b/doc/iscsi/getting_started.md @@ -111,16 +111,6 @@ the kernel to avoid interrupts and context switching. BDF 0000:00:00.0 BDF 0000:01:00.0 - # SPDK supports partitioning each nvme card into multiple LUNs - # through the NvmeLunsPerNs parameter. If NvmeLunsPerNs is specified, - # then the size of the nvme card is split up equally only if LunSizeinMB - # is not specified. For example, a 400GB NVMe namespace would be split - # into 4 LUNs, each 100GB in size. These LUNs could be presented - # individually (i.e. one LUN per TargetNode), or aggregated into a single - # target node as in the example above. Currently, maximal value supported - # by NvmeLunsPerNs is 256. - NvmeLunsPerNs 4 - # The number of attempts per I/O when an I/O fails. Do not include # this key to get the default behavior. NvmeRetryCount 4 @@ -130,11 +120,11 @@ the kernel to avoid interrupts and context switching. [TargetNodeX] # other TargetNode parameters go here (TargetName, Mapping, etc.) - # nvme with the following format: NvmeXnYpZ, where X = the controller ID, - # Y = the namespace ID, and Z = the partition ID + # nvme with the following format: NvmeXnY, where X = the controller ID + # and Y = the namespace ID # Note: NVMe namespace IDs always start at 1, not 0 - and most # controllers have only 1 namespace. - LUN0 Nvme0n1p0 + LUN0 Nvme0n1 ~~~ You should make a copy of the example configuration file, modify it to suit your environment, and diff --git a/etc/spdk/iscsi.conf.in b/etc/spdk/iscsi.conf.in index b1dfdfd2a..6891ebf21 100644 --- a/etc/spdk/iscsi.conf.in +++ b/etc/spdk/iscsi.conf.in @@ -99,10 +99,6 @@ BDF 0000:00:00.0 Nvme0 BDF 0000:01:00.0 Nvme1 - # The following two arguments allow the user to partition NVMe namespaces - # into multiple LUNs - NvmeLunsPerNs 1 - LunSizeInMB 1024 # The number of attempts per I/O when an I/O fails. Do not include # this key to get the default behavior. NvmeRetryCount 4 diff --git a/lib/bdev/nvme/blockdev_nvme.c b/lib/bdev/nvme/blockdev_nvme.c index c7456cbc7..b3021373d 100644 --- a/lib/bdev/nvme/blockdev_nvme.c +++ b/lib/bdev/nvme/blockdev_nvme.c @@ -77,9 +77,6 @@ struct nvme_blockdev { struct spdk_nvme_ctrlr *ctrlr; struct nvme_device *dev; struct spdk_nvme_ns *ns; - uint64_t lba_start; - uint64_t lba_end; - uint64_t blocklen; }; struct nvme_io_channel { @@ -109,9 +106,7 @@ enum data_direction { static struct nvme_blockdev g_blockdev[NVME_MAX_BLOCKDEVS]; static int blockdev_index_max = 0; -static int nvme_luns_per_ns = 1; static int nvme_controller_index = 0; -static int lun_size_in_mb = 0; static int num_controllers = -1; static int g_reset_controller_on_timeout = 0; static int g_timeout = 0; @@ -119,8 +114,7 @@ static int g_nvme_adminq_poll_timeout_us = 0; static TAILQ_HEAD(, nvme_device) g_nvme_devices = TAILQ_HEAD_INITIALIZER(g_nvme_devices);; -static void nvme_ctrlr_initialize_blockdevs(struct nvme_device *nvme_dev, - int bdev_per_ns, int ctrlr_id); +static void nvme_ctrlr_initialize_blockdevs(struct nvme_device *nvme_dev, int ctrlr_id); static int nvme_library_init(void); static void nvme_library_fini(void); static int nvme_queue_cmd(struct nvme_blockdev *bdev, struct spdk_nvme_qpair *qpair, @@ -537,7 +531,7 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, spdk_pci_addr_parse(&dev->pci_addr, trid->traddr); dev->id = nvme_controller_index++; - nvme_ctrlr_initialize_blockdevs(dev, nvme_luns_per_ns, dev->id); + nvme_ctrlr_initialize_blockdevs(dev, dev->id); spdk_poller_register(&dev->adminq_timer_poller, blockdev_nvme_poll_adminq, ctrlr, spdk_app_get_current_core(), g_nvme_adminq_poll_timeout_us); @@ -616,21 +610,6 @@ nvme_library_init(void) return 0; } - nvme_luns_per_ns = spdk_conf_section_get_intval(sp, "NvmeLunsPerNs"); - if (nvme_luns_per_ns < 1) - nvme_luns_per_ns = 1; - - if (nvme_luns_per_ns > NVME_MAX_BLOCKDEVS_PER_CONTROLLER) { - SPDK_ERRLOG("The input value nvme_luns_per_ns(%d) exceeds the maximal " - "value(%d)\n", nvme_luns_per_ns, NVME_MAX_BLOCKDEVS_PER_CONTROLLER); - return -1; - } - - lun_size_in_mb = spdk_conf_section_get_intval(sp, "LunSizeInMB"); - - if (lun_size_in_mb < 0) - lun_size_in_mb = 0; - spdk_nvme_retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount"); if (spdk_nvme_retry_count < 0) spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT; @@ -697,15 +676,13 @@ nvme_library_fini(void) } static void -nvme_ctrlr_initialize_blockdevs(struct nvme_device *nvme_dev, int bdev_per_ns, int ctrlr_id) +nvme_ctrlr_initialize_blockdevs(struct nvme_device *nvme_dev, int ctrlr_id) { struct nvme_blockdev *bdev; struct spdk_nvme_ctrlr *ctrlr = nvme_dev->ctrlr; struct spdk_nvme_ns *ns; const struct spdk_nvme_ctrlr_data *cdata; - uint64_t bdev_size, lba_offset, sectors_per_stripe; - int ns_id, num_ns, bdev_idx; - uint64_t lun_size_in_sector; + int ns_id, num_ns; num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); cdata = spdk_nvme_ctrlr_get_data(ctrlr); @@ -718,65 +695,42 @@ nvme_ctrlr_initialize_blockdevs(struct nvme_device *nvme_dev, int bdev_per_ns, i continue; } - bdev_size = spdk_nvme_ns_get_num_sectors(ns) / bdev_per_ns; - - /* - * Align each blockdev on a 1MB boundary - this helps cover Fultondale case - * where I/O that span a 128KB boundary must be split for optimal performance. - * Using a 1MB hardcoded boundary here so that we do not have to export - * stripe size information from the NVMe driver for now. - */ - sectors_per_stripe = (1 << 20) / spdk_nvme_ns_get_sector_size(ns); - - lun_size_in_sector = ((uint64_t)lun_size_in_mb << 20) / spdk_nvme_ns_get_sector_size(ns); - if ((lun_size_in_mb > 0) && (lun_size_in_sector < bdev_size)) - bdev_size = lun_size_in_sector; - - bdev_size &= ~(sectors_per_stripe - 1); - - lba_offset = 0; - for (bdev_idx = 0; bdev_idx < bdev_per_ns; bdev_idx++) { - if (blockdev_index_max >= NVME_MAX_BLOCKDEVS) - return; - - bdev = &g_blockdev[blockdev_index_max]; - bdev->ctrlr = ctrlr; - bdev->dev = nvme_dev; - bdev->ns = ns; - bdev->lba_start = lba_offset; - bdev->lba_end = lba_offset + bdev_size - 1; - lba_offset += bdev_size; - - snprintf(bdev->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, - "Nvme%dn%dp%d", ctrlr_id, spdk_nvme_ns_get_id(ns), bdev_idx); - snprintf(bdev->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, - "NVMe disk"); - - if (cdata->oncs.dsm) { - /* - * Enable the thin provisioning - * if nvme controller supports - * DataSet Management command. - */ - bdev->disk.thin_provisioning = 1; - bdev->disk.max_unmap_bdesc_count = - NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT; - } - - bdev->disk.write_cache = 0; - if (cdata->vwc.present) { - /* Enable if the Volatile Write Cache exists */ - bdev->disk.write_cache = 1; - } - bdev->blocklen = spdk_nvme_ns_get_sector_size(ns); - bdev->disk.blocklen = bdev->blocklen; - bdev->disk.blockcnt = bdev->lba_end - bdev->lba_start + 1; - bdev->disk.ctxt = bdev; - bdev->disk.fn_table = &nvmelib_fn_table; - spdk_bdev_register(&bdev->disk); - - blockdev_index_max++; + if (blockdev_index_max >= NVME_MAX_BLOCKDEVS) { + return; } + + bdev = &g_blockdev[blockdev_index_max]; + bdev->ctrlr = ctrlr; + bdev->dev = nvme_dev; + bdev->ns = ns; + + snprintf(bdev->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, + "Nvme%dn%d", ctrlr_id, spdk_nvme_ns_get_id(ns)); + snprintf(bdev->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, + "NVMe disk"); + + if (cdata->oncs.dsm) { + /* + * Enable the thin provisioning + * if nvme controller supports + * DataSet Management command. + */ + bdev->disk.thin_provisioning = 1; + bdev->disk.max_unmap_bdesc_count = NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT; + } + + bdev->disk.write_cache = 0; + if (cdata->vwc.present) { + /* Enable if the Volatile Write Cache exists */ + bdev->disk.write_cache = 1; + } + bdev->disk.blocklen = spdk_nvme_ns_get_sector_size(ns); + bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); + bdev->disk.ctxt = bdev; + bdev->disk.fn_table = &nvmelib_fn_table; + spdk_bdev_register(&bdev->disk); + + blockdev_index_max++; } } @@ -840,8 +794,7 @@ nvme_queue_cmd(struct nvme_blockdev *bdev, struct spdk_nvme_qpair *qpair, { uint32_t ss = spdk_nvme_ns_get_sector_size(bdev->ns); uint32_t lba_count; - uint64_t relative_lba = offset / bdev->blocklen; - uint64_t next_lba = relative_lba + bdev->lba_start; + uint64_t lba = offset / bdev->disk.blocklen; int rc; if (nbytes % ss) { @@ -858,11 +811,11 @@ nvme_queue_cmd(struct nvme_blockdev *bdev, struct spdk_nvme_qpair *qpair, bio->iov_offset = 0; if (direction == BDEV_DISK_READ) { - rc = spdk_nvme_ns_cmd_readv(bdev->ns, qpair, next_lba, + rc = spdk_nvme_ns_cmd_readv(bdev->ns, qpair, lba, lba_count, queued_done, bio, 0, queued_reset_sgl, queued_next_sge); } else { - rc = spdk_nvme_ns_cmd_writev(bdev->ns, qpair, next_lba, + rc = spdk_nvme_ns_cmd_writev(bdev->ns, qpair, lba, lba_count, queued_done, bio, 0, queued_reset_sgl, queued_next_sge); } @@ -888,7 +841,7 @@ blockdev_nvme_unmap(struct nvme_blockdev *nbdev, struct spdk_io_channel *ch, } for (i = 0; i < bdesc_count; i++) { - dsm_range[i].starting_lba = nbdev->lba_start + from_be64(&unmap_d->lba); + dsm_range[i].starting_lba = from_be64(&unmap_d->lba); dsm_range[i].length = from_be32(&unmap_d->block_count); dsm_range[i].attributes.raw = 0; unmap_d++; @@ -910,16 +863,10 @@ blockdev_nvme_get_spdk_running_config(FILE *fp) { fprintf(fp, "\n" - "# Users may change this to partition an NVMe namespace into multiple LUNs.\n" - "[Nvme]\n" - " NvmeLunsPerNs %d\n", - nvme_luns_per_ns); + "[Nvme]\n"); if (num_controllers != -1) { fprintf(fp, " NumControllers %d\n", num_controllers); } - if (lun_size_in_mb != 0) { - fprintf(fp, " LunSizeInMB %d\n", lun_size_in_mb); - } fprintf(fp, " # Set how often the admin queue is polled for asynchronous events.\n" " # Units in microseconds.\n" " AdminPollRate %d\n", g_nvme_adminq_poll_timeout_us); diff --git a/test/iscsi_tgt/ext4test/ext4test.sh b/test/iscsi_tgt/ext4test/ext4test.sh index 7b4adcd39..383c2edf8 100755 --- a/test/iscsi_tgt/ext4test/ext4test.sh +++ b/test/iscsi_tgt/ext4test/ext4test.sh @@ -48,7 +48,7 @@ $rpc_py add_initiator_group $INITIATOR_TAG $INITIATOR_NAME $NETMASK # "64" ==> iSCSI queue depth 64 # "1 0 0 0" ==> disable CHAP authentication if [ -z "$NO_NVME" ]; then -$rpc_py construct_target_node Target0 Target0_alias Nvme0n1p0:0 1:2 64 1 0 0 0 +$rpc_py construct_target_node Target0 Target0_alias Nvme0n1:0 1:2 64 1 0 0 0 fi $rpc_py construct_target_node Target1 Target1_alias Malloc0:0 1:2 64 1 0 0 0 diff --git a/test/iscsi_tgt/ext4test/iscsi.conf b/test/iscsi_tgt/ext4test/iscsi.conf index 027a8d2c5..a027c9cec 100644 --- a/test/iscsi_tgt/ext4test/iscsi.conf +++ b/test/iscsi_tgt/ext4test/iscsi.conf @@ -45,8 +45,6 @@ # will use RPC to set up this part of the configuration. [Nvme] NumControllers 1 - NvmeLunsPerNs 1 - LunSizeInMB 4096 [Malloc] NumberOfLuns 1 diff --git a/test/iscsi_tgt/fio/iscsi.conf b/test/iscsi_tgt/fio/iscsi.conf index fd9430e75..a112fa221 100644 --- a/test/iscsi_tgt/fio/iscsi.conf +++ b/test/iscsi_tgt/fio/iscsi.conf @@ -14,4 +14,3 @@ Enable Yes [Nvme] - NvmeLunsPerNs 1 diff --git a/test/lib/bdev/bdev.conf b/test/lib/bdev/bdev.conf index 73cf65436..01c9e517a 100644 --- a/test/lib/bdev/bdev.conf +++ b/test/lib/bdev/bdev.conf @@ -1,5 +1,4 @@ [Nvme] - NvmeLunsPerNs 1 # autotest.sh will automatically rmmod ioatdma, so we do # not need to specify Whitelist