diff --git a/CHANGELOG.md b/CHANGELOG.md index c83e04ef2..f3970c653 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ An [fio](http://github.com/axboe/fio) plugin was added that can route I/O to the bdev layer. See the [plugin documentation](https://github.com/spdk/spdk/blob/master/examples/bdev/fio_plugin/README.md) for more information. +spdk_bdev_unmap() was modified to take an offset and a length in bytes as +arguments instead of requiring the user to provide an array of SCSI +unmap descriptors. This limits unmaps to a single contiguous range. + ## v17.07: Build system improvements, userspace vhost-blk target, and GPT bdev ### Build System diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 461c61aff..a0f816cfb 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -210,14 +210,6 @@ uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev); */ uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev); -/** - * Get maximum number of descriptors per unmap request. - * - * \param bdev Block device to query. - * \return Maximum number of unmap descriptors per request. - */ -uint32_t spdk_bdev_get_max_unmap_descriptors(const struct spdk_bdev *bdev); - /** * Get minimum I/O buffer address alignment for a bdev. * @@ -344,8 +336,8 @@ int spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, * * \param bdev Block device * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). - * \param unmap_d An array of unmap descriptors. - * \param bdesc_count The number of elements in unmap_d. + * \param offset The offset, in bytes, from the start of the block device. + * \param nbytes The number of bytes to unmap. Must be a multiple of the block size. * \param cb Called when the request is complete. * \param cb_arg Argument passed to cb. * @@ -354,8 +346,7 @@ int spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, * negated errno on failure, in which case the callback will not be called. */ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct spdk_scsi_unmap_bdesc *unmap_d, - uint16_t bdesc_count, + uint64_t offset, uint64_t nbytes, spdk_bdev_io_completion_cb cb, void *cb_arg); /** diff --git a/include/spdk/blob.h b/include/spdk/blob.h index 94965f83a..86823b5ec 100644 --- a/include/spdk/blob.h +++ b/include/spdk/blob.h @@ -94,12 +94,6 @@ struct spdk_bs_dev_cb_args { spdk_bs_dev_cpl cb_fn; struct spdk_io_channel *channel; void *cb_arg; - /* - * Blobstore device implementations can use this for scratch space for any data - * structures needed to translate the function arguments to the required format - * for the backing store. - */ - uint8_t scratch[32]; }; struct spdk_bs_dev { diff --git a/include/spdk_internal/bdev.h b/include/spdk_internal/bdev.h index a56c31665..8e5b2dc4b 100644 --- a/include/spdk_internal/bdev.h +++ b/include/spdk_internal/bdev.h @@ -200,9 +200,6 @@ struct spdk_bdev { /** function table for all LUN ops */ const struct spdk_bdev_fn_table *fn_table; - /** Represents maximum unmap block descriptor count */ - uint32_t max_unmap_bdesc_count; - /** generation value used by block device reset */ uint32_t gencnt; @@ -299,11 +296,11 @@ struct spdk_bdev_io { uint64_t offset; } write; struct { - /** Represents the unmap block descriptors. */ - struct spdk_scsi_unmap_bdesc *unmap_bdesc; + /** Total size of region to be unmapped. */ + uint64_t len; - /** Count of unmap block descriptors. */ - uint16_t bdesc_count; + /** Starting offset (in bytes) of the bdev for this I/O. */ + uint64_t offset; } unmap; struct { /** Represents starting offset in bytes of the range to be flushed. */ diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 3f3ff460f..3cc2e4da4 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -722,12 +722,6 @@ spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) return bdev->blockcnt; } -uint32_t -spdk_bdev_get_max_unmap_descriptors(const struct spdk_bdev *bdev) -{ - return bdev->max_unmap_bdesc_count; -} - size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { @@ -931,8 +925,7 @@ spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct spdk_scsi_unmap_bdesc *unmap_d, - uint16_t bdesc_count, + uint64_t offset, uint64_t nbytes, spdk_bdev_io_completion_cb cb, void *cb_arg) { struct spdk_bdev *bdev = desc->bdev; @@ -944,14 +937,12 @@ spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, return -EBADF; } - if (bdesc_count == 0) { - SPDK_ERRLOG("Invalid bdesc_count 0\n"); + if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { return -EINVAL; } - if (bdesc_count > bdev->max_unmap_bdesc_count) { - SPDK_ERRLOG("Invalid bdesc_count %u > max_unmap_bdesc_count %u\n", - bdesc_count, bdev->max_unmap_bdesc_count); + if (nbytes == 0) { + SPDK_ERRLOG("Can't unmap 0 bytes\n"); return -EINVAL; } @@ -963,8 +954,8 @@ spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, bdev_io->ch = channel; bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; - bdev_io->u.unmap.unmap_bdesc = unmap_d; - bdev_io->u.unmap.bdesc_count = bdesc_count; + bdev_io->u.unmap.offset = offset; + bdev_io->u.unmap.len = nbytes; spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); rc = spdk_bdev_io_submit(bdev_io); diff --git a/lib/bdev/error/vbdev_error.c b/lib/bdev/error/vbdev_error.c index 871090187..0ef9b5c99 100644 --- a/lib/bdev/error/vbdev_error.c +++ b/lib/bdev/error/vbdev_error.c @@ -280,7 +280,6 @@ spdk_vbdev_error_create(struct spdk_bdev *base_bdev) disk->disk.blockcnt = base_bdev->blockcnt; disk->disk.blocklen = base_bdev->blocklen; disk->disk.write_cache = base_bdev->write_cache; - disk->disk.max_unmap_bdesc_count = base_bdev->max_unmap_bdesc_count; disk->disk.product_name = "Error Injection Disk"; disk->disk.ctxt = disk; disk->disk.fn_table = &vbdev_error_fn_table; diff --git a/lib/bdev/gpt/vbdev_gpt.c b/lib/bdev/gpt/vbdev_gpt.c index ca2aa2db8..93d1a4e77 100644 --- a/lib/bdev/gpt/vbdev_gpt.c +++ b/lib/bdev/gpt/vbdev_gpt.c @@ -171,14 +171,7 @@ gpt_write(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bd static void gpt_unmap(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io) { - uint16_t i; - uint64_t lba; - - for (i = 0; i < bdev_io->u.unmap.bdesc_count; i++) { - lba = from_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba); - lba += gpt_partition_disk->offset_blocks; - to_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba, lba); - } + bdev_io->u.unmap.offset += gpt_partition_disk->offset_bytes; } static void @@ -398,7 +391,6 @@ vbdev_gpt_create_bdevs(struct spdk_gpt_bdev *gpt_bdev) d->disk.blocklen = base_bdev->blocklen; d->disk.write_cache = base_bdev->write_cache; d->disk.need_aligned_buffer = base_bdev->need_aligned_buffer; - d->disk.max_unmap_bdesc_count = base_bdev->max_unmap_bdesc_count; /* index start at 1 instead of 0 to match the existing style */ d->disk.name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1); diff --git a/lib/bdev/malloc/bdev_malloc.c b/lib/bdev/malloc/bdev_malloc.c index 4520a5d66..bfe79ca1e 100644 --- a/lib/bdev/malloc/bdev_malloc.c +++ b/lib/bdev/malloc/bdev_malloc.c @@ -46,8 +46,6 @@ #include "spdk_internal/bdev.h" #include "spdk_internal/log.h" -#define MALLOC_MAX_UNMAP_BDESC 1 - struct malloc_disk { struct spdk_bdev disk; void *malloc_buf; @@ -235,24 +233,14 @@ static int bdev_malloc_unmap(struct malloc_disk *mdisk, struct spdk_io_channel *ch, struct malloc_task *task, - struct spdk_scsi_unmap_bdesc *unmap_d, - uint16_t bdesc_count) + uint64_t offset, + uint64_t byte_count) { - uint64_t lba, offset, byte_count; + uint64_t lba; uint32_t block_count; - assert(bdesc_count <= MALLOC_MAX_UNMAP_BDESC); - - /* - * For now, only support a single unmap descriptor per command. The copy engine API does not - * support batch submission of operations. - */ - assert(bdesc_count == 1); - - lba = from_be64(&unmap_d[0].lba); - offset = lba * mdisk->disk.blocklen; - block_count = from_be32(&unmap_d[0].block_count); - byte_count = (uint64_t)block_count * mdisk->disk.blocklen; + lba = offset / mdisk->disk.blocklen; + block_count = byte_count / mdisk->disk.blocklen; if (lba >= mdisk->disk.blockcnt || block_count > mdisk->disk.blockcnt - lba) { return -1; @@ -330,8 +318,8 @@ static int _bdev_malloc_submit_request(struct spdk_io_channel *ch, struct spdk_b return bdev_malloc_unmap((struct malloc_disk *)bdev_io->bdev->ctxt, ch, (struct malloc_task *)bdev_io->driver_ctx, - bdev_io->u.unmap.unmap_bdesc, - bdev_io->u.unmap.bdesc_count); + bdev_io->u.unmap.offset, + bdev_io->u.unmap.len); default: return -1; } @@ -418,7 +406,6 @@ struct spdk_bdev *create_malloc_disk(uint64_t num_blocks, uint32_t block_size) mdisk->disk.write_cache = 1; mdisk->disk.blocklen = block_size; mdisk->disk.blockcnt = num_blocks; - mdisk->disk.max_unmap_bdesc_count = MALLOC_MAX_UNMAP_BDESC; mdisk->disk.ctxt = mdisk; mdisk->disk.fn_table = &malloc_fn_table; diff --git a/lib/bdev/null/bdev_null.c b/lib/bdev/null/bdev_null.c index 9fcc6c15e..5928aba75 100644 --- a/lib/bdev/null/bdev_null.c +++ b/lib/bdev/null/bdev_null.c @@ -154,7 +154,6 @@ create_null_bdev(const char *name, uint64_t num_blocks, uint32_t block_size) bdev->bdev.write_cache = 0; bdev->bdev.blocklen = block_size; bdev->bdev.blockcnt = num_blocks; - bdev->bdev.max_unmap_bdesc_count = 0; bdev->bdev.ctxt = bdev; bdev->bdev.fn_table = &null_fn_table; diff --git a/lib/bdev/nvme/bdev_nvme.c b/lib/bdev/nvme/bdev_nvme.c index ce3805af5..45332b691 100644 --- a/lib/bdev/nvme/bdev_nvme.c +++ b/lib/bdev/nvme/bdev_nvme.c @@ -84,7 +84,6 @@ struct nvme_io_channel { uint64_t end_ticks; }; -#define NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT 1 struct nvme_bdev_io { /** array of iovecs to transfer. */ struct iovec *iovs; @@ -333,8 +332,8 @@ bdev_nvme_reset(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio) static int bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, struct nvme_bdev_io *bio, - struct spdk_scsi_unmap_bdesc *umap_d, - uint16_t bdesc_count); + uint64_t offset, + uint64_t nbytes); static void bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) @@ -381,8 +380,8 @@ _bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_ return bdev_nvme_unmap((struct nvme_bdev *)bdev_io->bdev->ctxt, ch, (struct nvme_bdev_io *)bdev_io->driver_ctx, - bdev_io->u.unmap.unmap_bdesc, - bdev_io->u.unmap.bdesc_count); + bdev_io->u.unmap.offset, + bdev_io->u.unmap.len); case SPDK_BDEV_IO_TYPE_RESET: return bdev_nvme_reset((struct nvme_bdev *)bdev_io->bdev->ctxt, @@ -1108,15 +1107,6 @@ nvme_ctrlr_create_bdevs(struct nvme_ctrlr *nvme_ctrlr) } bdev->disk.product_name = "NVMe disk"; - if (cdata->oncs.dsm) { - /* - * Enable the thin provisioning - * if nvme controller supports - * DataSet Management command. - */ - bdev->disk.max_unmap_bdesc_count = NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT; - } - bdev->disk.write_cache = 0; if (cdata->vwc.present) { /* Enable if the Volatile Write Cache exists */ @@ -1247,27 +1237,24 @@ bdev_nvme_queue_cmd(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, static int bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, struct nvme_bdev_io *bio, - struct spdk_scsi_unmap_bdesc *unmap_d, - uint16_t bdesc_count) + uint64_t offset, + uint64_t nbytes) { struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); - int rc = 0, i; - struct spdk_nvme_dsm_range dsm_range[NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT]; + int rc = 0; + struct spdk_nvme_dsm_range dsm_range = {}; - if (bdesc_count > NVME_DEFAULT_MAX_UNMAP_BDESC_COUNT) { + if (nbytes % nbdev->disk.blocklen) { + SPDK_ERRLOG("Unaligned IO request length\n"); return -EINVAL; } - for (i = 0; i < bdesc_count; i++) { - dsm_range[i].starting_lba = from_be64(&unmap_d->lba); - dsm_range[i].length = from_be32(&unmap_d->block_count); - dsm_range[i].attributes.raw = 0; - unmap_d++; - } + dsm_range.starting_lba = offset / nbdev->disk.blocklen; + dsm_range.length = nbytes / nbdev->disk.blocklen; rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair, SPDK_NVME_DSM_ATTR_DEALLOCATE, - dsm_range, bdesc_count, + &dsm_range, 1, bdev_nvme_queued_done, bio); return rc; diff --git a/lib/bdev/split/vbdev_split.c b/lib/bdev/split/vbdev_split.c index e5bcf5d11..0e14a1f40 100644 --- a/lib/bdev/split/vbdev_split.c +++ b/lib/bdev/split/vbdev_split.c @@ -83,14 +83,7 @@ split_write(struct split_disk *split_disk, struct spdk_bdev_io *bdev_io) static void split_unmap(struct split_disk *split_disk, struct spdk_bdev_io *bdev_io) { - uint16_t i; - uint64_t lba; - - for (i = 0; i < bdev_io->u.unmap.bdesc_count; i++) { - lba = from_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba); - lba += split_disk->offset_blocks; - to_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba, lba); - } + bdev_io->u.unmap.offset += split_disk->offset_bytes; } static void @@ -333,7 +326,6 @@ vbdev_split_create(struct spdk_bdev *base_bdev, uint64_t split_count, uint64_t s d->disk.blocklen = base_bdev->blocklen; d->disk.write_cache = base_bdev->write_cache; d->disk.need_aligned_buffer = base_bdev->need_aligned_buffer; - d->disk.max_unmap_bdesc_count = base_bdev->max_unmap_bdesc_count; /* Append partition number to the base bdev's name, e.g. Malloc0 -> Malloc0p0 */ d->disk.name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i); diff --git a/lib/blob/bdev/blob_bdev.c b/lib/blob/bdev/blob_bdev.c index d686019e4..44b95ff34 100644 --- a/lib/blob/bdev/blob_bdev.c +++ b/lib/blob/bdev/blob_bdev.c @@ -107,17 +107,12 @@ static void bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) { - struct spdk_scsi_unmap_bdesc *desc; + struct spdk_bdev *bdev = __get_bdev(dev); int rc; + uint32_t block_size = spdk_bdev_get_block_size(bdev); - SPDK_STATIC_ASSERT(sizeof(cb_args->scratch) >= sizeof(*desc), "scratch too small"); - - desc = (struct spdk_scsi_unmap_bdesc *)cb_args->scratch; - to_be64(&desc->lba, lba); - to_be32(&desc->block_count, lba_count); - desc->reserved = 0; - - rc = spdk_bdev_unmap(__get_desc(dev), channel, desc, 1, bdev_blob_io_complete, cb_args); + rc = spdk_bdev_unmap(__get_desc(dev), channel, lba * block_size, lba_count * block_size, + bdev_blob_io_complete, cb_args); if (rc) { cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, rc); } diff --git a/lib/nbd/nbd.c b/lib/nbd/nbd.c index dfeb2f21a..ed014ab11 100644 --- a/lib/nbd/nbd.c +++ b/lib/nbd/nbd.c @@ -58,8 +58,6 @@ struct nbd_io { struct nbd_reply resp; bool resp_in_progress; - struct spdk_scsi_unmap_bdesc unmap; - /* * Tracks current progress on reading/writing a request, * response, or payload from the nbd socket. @@ -176,9 +174,8 @@ nbd_submit_bdev_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, io->payload_size, nbd_io_done, io); break; case SPDK_BDEV_IO_TYPE_UNMAP: - to_be64(&io->unmap.lba, from_be64(&io->req.from) / spdk_bdev_get_block_size(bdev)); - to_be32(&io->unmap.block_count, io->payload_size / spdk_bdev_get_block_size(bdev)); - rc = spdk_bdev_unmap(desc, ch, &io->unmap, 1, nbd_io_done, io); + rc = spdk_bdev_unmap(desc, ch, from_be64(&io->req.from), + io->payload_size, nbd_io_done, io); break; case SPDK_BDEV_IO_TYPE_FLUSH: rc = spdk_bdev_flush(desc, ch, 0, spdk_bdev_get_num_blocks(bdev) * spdk_bdev_get_block_size(bdev), diff --git a/lib/nvmf/ctrlr_bdev.c b/lib/nvmf/ctrlr_bdev.c index dbc43f4e6..a3b458a78 100644 --- a/lib/nvmf/ctrlr_bdev.c +++ b/lib/nvmf/ctrlr_bdev.c @@ -129,13 +129,8 @@ nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success, { struct spdk_nvmf_request *req = cb_arg; struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; - struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; int sc, sct; - if (cmd->opc == SPDK_NVME_OPC_DATASET_MANAGEMENT) { - free(req->unmap_bdesc); - } - spdk_bdev_io_get_nvme_status(bdev_io, &sc, &sct); response->status.sc = sc; response->status.sct = sct; @@ -500,17 +495,44 @@ nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; } +struct nvmf_virtual_ctrlr_unmap { + struct spdk_nvmf_request *req; + uint32_t count; +}; + +static void +nvmf_virtual_ctrlr_dsm_cpl(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + struct nvmf_virtual_ctrlr_unmap *unmap_ctx = cb_arg; + struct spdk_nvmf_request *req = unmap_ctx->req; + struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; + int sc, sct; + + unmap_ctx->count--; + + if (response->status.sct == SPDK_NVME_SCT_GENERIC && + response->status.sc == SPDK_NVME_SC_SUCCESS) { + spdk_bdev_io_get_nvme_status(bdev_io, &sc, &sct); + response->status.sc = sc; + response->status.sct = sct; + } + + if (unmap_ctx->count == 0) { + spdk_nvmf_request_complete(req); + spdk_bdev_free_io(bdev_io); + free(unmap_ctx); + } +} + static int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct spdk_nvmf_request *req) { - int i; uint32_t attribute; - uint16_t nr; - struct spdk_scsi_unmap_bdesc *unmap; + uint16_t nr, i; struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl; - bool async = false; nr = ((cmd->cdw10 & 0x000000ff) + 1); if (nr * sizeof(struct spdk_nvme_dsm_range) > req->length) { @@ -521,30 +543,49 @@ nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, attribute = cmd->cdw11 & 0x00000007; if (attribute & SPDK_NVME_DSM_ATTR_DEALLOCATE) { - struct spdk_nvme_dsm_range *dsm_range = (struct spdk_nvme_dsm_range *)req->data; - unmap = calloc(nr, sizeof(*unmap)); - if (unmap == NULL) { - SPDK_ERRLOG("memory allocation failure\n"); + struct nvmf_virtual_ctrlr_unmap *unmap_ctx; + struct spdk_nvme_dsm_range *dsm_range; + uint64_t lba; + uint32_t lba_count; + uint32_t block_size = spdk_bdev_get_block_size(bdev); + + unmap_ctx = calloc(1, sizeof(*unmap_ctx)); + if (!unmap_ctx) { response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; } + unmap_ctx->req = req; + + response->status.sct = SPDK_NVME_SCT_GENERIC; + response->status.sc = SPDK_NVME_SC_SUCCESS; + + dsm_range = (struct spdk_nvme_dsm_range *)req->data; for (i = 0; i < nr; i++) { - to_be64(&unmap[i].lba, dsm_range[i].starting_lba); - to_be32(&unmap[i].block_count, dsm_range[i].length); + lba = dsm_range[i].starting_lba; + lba_count = dsm_range[i].length; + + unmap_ctx->count++; + + if (spdk_bdev_unmap(desc, ch, lba * block_size, lba_count * block_size, + nvmf_virtual_ctrlr_dsm_cpl, unmap_ctx)) { + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + unmap_ctx->count--; + /* We can't return here - we may have to wait for any other + * unmaps already sent to complete */ + break; + } } - if (spdk_bdev_unmap(desc, ch, unmap, nr, nvmf_bdev_ctrlr_complete_cmd, req)) { - free(unmap); - response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; + + if (unmap_ctx->count == 0) { + free(unmap_ctx); return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; } - req->unmap_bdesc = unmap; - async = true; - } - if (async) { return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS; } + + response->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE; } diff --git a/lib/nvmf/request.h b/lib/nvmf/request.h index e1a77e616..341188497 100644 --- a/lib/nvmf/request.h +++ b/lib/nvmf/request.h @@ -65,7 +65,6 @@ struct spdk_nvmf_request { void *data; union nvmf_h2c_msg *cmd; union nvmf_c2h_msg *rsp; - struct spdk_scsi_unmap_bdesc *unmap_bdesc; }; int diff --git a/lib/scsi/scsi.c b/lib/scsi/scsi.c index 2b9ffbb19..33bcf8a01 100644 --- a/lib/scsi/scsi.c +++ b/lib/scsi/scsi.c @@ -37,7 +37,6 @@ #include "spdk/conf.h" #define DEFAULT_MAX_UNMAP_LBA_COUNT 4194304 -#define DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT 1 #define DEFAULT_OPTIMAL_UNMAP_GRANULARITY 0 #define DEFAULT_UNMAP_GRANULARITY_ALIGNMENT 0 #define DEFAULT_UGAVALID 0 @@ -49,8 +48,6 @@ static void spdk_set_default_scsi_parameters(void) { g_spdk_scsi.scsi_params.max_unmap_lba_count = DEFAULT_MAX_UNMAP_LBA_COUNT; - g_spdk_scsi.scsi_params.max_unmap_block_descriptor_count = - DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT; g_spdk_scsi.scsi_params.optimal_unmap_granularity = DEFAULT_OPTIMAL_UNMAP_GRANULARITY; g_spdk_scsi.scsi_params.unmap_granularity_alignment = @@ -75,9 +72,6 @@ spdk_read_config_scsi_parameters(void) g_spdk_scsi.scsi_params.max_unmap_lba_count = (val == NULL) ? DEFAULT_MAX_UNMAP_LBA_COUNT : strtoul(val, NULL, 10); - val = spdk_conf_section_get_val(sp, "MaxUnmapBlockDescriptorCount"); - g_spdk_scsi.scsi_params.max_unmap_block_descriptor_count = (val == NULL) ? - DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT : strtoul(val, NULL, 10); val = spdk_conf_section_get_val(sp, "OptimalUnmapGranularity"); g_spdk_scsi.scsi_params.optimal_unmap_granularity = (val == NULL) ? DEFAULT_OPTIMAL_UNMAP_GRANULARITY : strtoul(val, NULL, 10); diff --git a/lib/scsi/scsi_bdev.c b/lib/scsi/scsi_bdev.c index a91699f94..2d5f2d552 100644 --- a/lib/scsi/scsi_bdev.c +++ b/lib/scsi/scsi_bdev.c @@ -54,6 +54,7 @@ #define DEFAULT_DISK_REVISION "0001" #define DEFAULT_DISK_ROTATION_RATE 1 /* Non-rotating medium */ #define DEFAULT_DISK_FORM_FACTOR 0x02 /* 3.5 inch */ +#define DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT 256 #define INQUIRY_OFFSET(field) offsetof(struct spdk_scsi_cdb_inquiry_data, field) + \ sizeof(((struct spdk_scsi_cdb_inquiry_data *)0x0)->field) @@ -554,8 +555,6 @@ spdk_bdev_scsi_inquiry(struct spdk_bdev *bdev, struct spdk_scsi_task *task, len = 20 - hlen; if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) { - uint32_t max_unmap_desc; - /* * MAXIMUM UNMAP LBA COUNT: indicates the * maximum number of LBAs that may be @@ -569,10 +568,10 @@ spdk_bdev_scsi_inquiry(struct spdk_bdev *bdev, struct spdk_scsi_task *task, * block descriptors that shall be contained * in the parameter data transferred to the * device server for an UNMAP command. + * The bdev layer automatically splits unmap + * requests, so pick an arbitrary high number here. */ - max_unmap_desc = spdk_min(spdk_bdev_get_max_unmap_descriptors(bdev), - g_spdk_scsi.scsi_params.max_unmap_block_descriptor_count); - to_be32(&data[24], max_unmap_desc); + to_be32(&data[24], DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT); /* * OPTIMAL UNMAP GRANULARITY: indicates the @@ -1484,73 +1483,153 @@ spdk_bdev_scsi_readwrite(struct spdk_bdev *bdev, } } +struct spdk_bdev_scsi_unmap_ctx { + struct spdk_scsi_task *task; + struct spdk_scsi_unmap_bdesc desc[DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT]; + uint32_t count; +}; + +static void +spdk_bdev_scsi_task_complete_unmap_cmd(struct spdk_bdev_io *bdev_io, bool success, + void *cb_arg) +{ + struct spdk_bdev_scsi_unmap_ctx *ctx = cb_arg; + struct spdk_scsi_task *task = ctx->task; + int sc, sk, asc, ascq; + + ctx->count--; + + task->bdev_io = bdev_io; + + if (task->status == SPDK_SCSI_STATUS_GOOD) { + spdk_bdev_io_get_scsi_status(bdev_io, &sc, &sk, &asc, &ascq); + spdk_scsi_task_set_status(task, sc, sk, asc, ascq); + } + + if (ctx->count == 0) { + spdk_scsi_lun_complete_task(task->lun, task); + free(ctx); + } +} + +static int +__copy_desc(struct spdk_bdev_scsi_unmap_ctx *ctx, uint8_t *data, size_t data_len) +{ + uint16_t desc_data_len; + uint16_t desc_count; + + if (!data) { + return -EINVAL; + } + + if (data_len < 8) { + /* We can't even get the reported length, so fail. */ + return -EINVAL; + } + + desc_data_len = from_be16(&data[2]); + desc_count = desc_data_len / 16; + + if (desc_data_len > (data_len - 8)) { + SPDK_ERRLOG("Error - desc_data_len (%u) > data_len (%lu) - 8", + desc_data_len, data_len); + return -EINVAL; + } + + if (desc_count > DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT) { + SPDK_ERRLOG("desc_count (%u) greater than max allowed (%u)\n", + desc_count, DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT); + return -EINVAL; + } + + memcpy(ctx->desc, &data[8], desc_data_len); + return desc_count; +} + static int spdk_bdev_scsi_unmap(struct spdk_bdev *bdev, struct spdk_scsi_task *task) { - uint8_t *data; - struct spdk_scsi_unmap_bdesc *desc; - uint32_t bdesc_count, max_unmap_bdesc_count; - int bdesc_data_len; - int data_len; - int rc; + uint8_t *data; + struct spdk_bdev_scsi_unmap_ctx *ctx; + int desc_count, i; + int data_len; + int rc; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) { + spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION, + SPDK_SCSI_SENSE_NO_SENSE, + SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE, + SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + return SPDK_SCSI_TASK_COMPLETE; + } + + ctx->task = task; + ctx->count = 0; if (task->iovcnt == 1) { data = (uint8_t *)task->iovs[0].iov_base; data_len = task->iovs[0].iov_len; + desc_count = __copy_desc(ctx, data, data_len); } else { data = spdk_scsi_task_gather_data(task, &data_len); + desc_count = __copy_desc(ctx, data, data_len); + if (desc_count < 0) { + spdk_dma_free(data); + } } - /* - * The UNMAP BLOCK DESCRIPTOR DATA LENGTH field specifies the length in - * bytes of the UNMAP block descriptors that are available to be - * transferred from the Data-Out Buffer. The unmap block descriptor data - * length should be a multiple of 16. If the unmap block descriptor data - * length is not a multiple of 16, then the last unmap block descriptor - * is incomplete and shall be ignored. - */ - bdesc_data_len = from_be16(&data[2]); - bdesc_count = bdesc_data_len / 16; - assert(bdesc_data_len <= data_len); - - if (task->iovcnt == 1) { - desc = (struct spdk_scsi_unmap_bdesc *)&data[8]; - } else { - desc = spdk_scsi_task_alloc_data(task, bdesc_data_len - 8); - memcpy(desc, &data[8], bdesc_data_len - 8); - spdk_dma_free(data); - } - - max_unmap_bdesc_count = spdk_bdev_get_max_unmap_descriptors(bdev); - if (bdesc_count > max_unmap_bdesc_count) { - SPDK_ERRLOG("Error - supported unmap block descriptor count limit" - " is %u\n", max_unmap_bdesc_count); - spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION, - SPDK_SCSI_SENSE_NO_SENSE, - SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE, - SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - return SPDK_SCSI_TASK_COMPLETE; - } else if (bdesc_data_len > data_len) { - SPDK_ERRLOG("Error - bdesc_data_len (%d) > data_len (%d)", - bdesc_data_len, data_len); + if (desc_count < 0) { spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION, SPDK_SCSI_SENSE_ILLEGAL_REQUEST, SPDK_SCSI_ASC_INVALID_FIELD_IN_CDB, SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + free(ctx); return SPDK_SCSI_TASK_COMPLETE; } - rc = spdk_bdev_unmap(task->desc, task->ch, desc, - bdesc_count, spdk_bdev_scsi_task_complete_cmd, - task); + /* Before we submit commands, set the status to success */ + spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_GOOD, + SPDK_SCSI_SENSE_NO_SENSE, + SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE, + SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - if (rc) { - SPDK_ERRLOG("SCSI Unmapping failed\n"); - spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION, - SPDK_SCSI_SENSE_NO_SENSE, - SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE, - SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + for (i = 0; i < desc_count; i++) { + struct spdk_scsi_unmap_bdesc *desc; + uint64_t block_size; + uint64_t offset; + uint64_t nbytes; + + desc = &ctx->desc[i]; + + block_size = spdk_bdev_get_block_size(bdev); + offset = from_be64(&desc->lba) * block_size; + nbytes = from_be32(&desc->block_count) * block_size; + + if (nbytes == 0) { + continue; + } + + ctx->count++; + rc = spdk_bdev_unmap(task->desc, task->ch, offset, nbytes, + spdk_bdev_scsi_task_complete_unmap_cmd, ctx); + + if (rc) { + SPDK_ERRLOG("SCSI Unmapping failed\n"); + spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION, + SPDK_SCSI_SENSE_NO_SENSE, + SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE, + SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + ctx->count--; + /* We can't complete here - we may have to wait for previously + * submitted unmaps to complete */ + break; + } + } + + if (ctx->count == 0) { + free(ctx); return SPDK_SCSI_TASK_COMPLETE; } diff --git a/lib/scsi/scsi_internal.h b/lib/scsi/scsi_internal.h index 1d1049d56..5811da542 100644 --- a/lib/scsi/scsi_internal.h +++ b/lib/scsi/scsi_internal.h @@ -165,7 +165,6 @@ int spdk_bdev_scsi_reset(struct spdk_bdev *bdev, struct spdk_scsi_task *task); struct spdk_scsi_parameters { uint32_t max_unmap_lba_count; - uint32_t max_unmap_block_descriptor_count; uint32_t optimal_unmap_granularity; uint32_t unmap_granularity_alignment; uint32_t ugavalid; diff --git a/test/lib/bdev/bdevperf/bdevperf.c b/test/lib/bdev/bdevperf/bdevperf.c index fe2eaa6cc..854b807e8 100644 --- a/test/lib/bdev/bdevperf/bdevperf.c +++ b/test/lib/bdev/bdevperf/bdevperf.c @@ -52,7 +52,6 @@ struct bdevperf_task { struct io_target *target; void *buf; uint64_t offset; - struct spdk_scsi_unmap_bdesc bdesc; }; static int g_io_size = 0; @@ -276,14 +275,8 @@ bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, bool success, target = task->target; if (g_unmap) { - uint32_t block_size = spdk_bdev_get_block_size(target->bdev); - - /* Unmap the data */ - to_be64(&task->bdesc.lba, task->offset / block_size); - to_be32(&task->bdesc.block_count, g_io_size / block_size); - - rc = spdk_bdev_unmap(target->bdev_desc, target->ch, &task->bdesc, 1, bdevperf_unmap_complete, - task); + rc = spdk_bdev_unmap(target->bdev_desc, target->ch, task->offset, g_io_size, + bdevperf_unmap_complete, task); if (rc) { printf("Failed to submit unmap: %d\n", rc); target->is_draining = true; diff --git a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c index 6ab2c10d4..8146d30f1 100644 --- a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c +++ b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c @@ -151,7 +151,7 @@ spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct spdk_scsi_unmap_bdesc *unmap_d, uint16_t bdesc_count, spdk_bdev_io_completion_cb cb, + uint64_t offset, uint64_t length, spdk_bdev_io_completion_cb cb, void *cb_arg) { return 0; diff --git a/test/unit/lib/scsi/scsi.c/scsi_ut.c b/test/unit/lib/scsi/scsi.c/scsi_ut.c index e448ba2e1..581ec78a0 100644 --- a/test/unit/lib/scsi/scsi.c/scsi_ut.c +++ b/test/unit/lib/scsi/scsi.c/scsi_ut.c @@ -89,7 +89,6 @@ set_default_scsi_params(struct spdk_scsi_parameters *params) { memset(params, 0, sizeof(*params)); params->max_unmap_lba_count = DEFAULT_MAX_UNMAP_LBA_COUNT; - params->max_unmap_block_descriptor_count = DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT; params->optimal_unmap_granularity = DEFAULT_OPTIMAL_UNMAP_GRANULARITY; params->unmap_granularity_alignment = DEFAULT_UNMAP_GRANULARITY_ALIGNMENT; params->ugavalid = DEFAULT_UGAVALID; @@ -139,29 +138,6 @@ scsi_init_set_max_unmap_lba_count_config_param(void) spdk_conf_free(config); } -static void -scsi_init_set_max_unmap_block_descriptor_count_config_param(void) -{ - struct spdk_scsi_parameters params; - struct spdk_conf *config; - int rc; - - /* set scsi_params.max_unmap_block_descriptor_count = 1 - * of Scsi section */ - config = spdk_config_init_scsi_params("MaxUnmapBlockDescriptorCount", "1"); - spdk_conf_set_as_default(config); - rc = spdk_scsi_init(); - CU_ASSERT_EQUAL(rc, 0); - - /* Assert the scsi_params.max_unmap_block_descriptor_count == 1 and - * assert the rest of the params are set to their default values */ - set_default_scsi_params(¶ms); - params.max_unmap_block_descriptor_count = 1; - CU_ASSERT(memcmp(&g_spdk_scsi.scsi_params, ¶ms, sizeof(params)) == 0); - - spdk_conf_free(config); -} - static void scsi_init_set_optimal_unmap_granularity_config_param(void) { @@ -366,8 +342,6 @@ main(int argc, char **argv) scsi_init_sp_null) == NULL || CU_add_test(suite, "scsi init - set max_unmap_lba_count", \ scsi_init_set_max_unmap_lba_count_config_param) == NULL - || CU_add_test(suite, "scsi init - set max_unmap_block_descriptor_count", \ - scsi_init_set_max_unmap_block_descriptor_count_config_param) == NULL || CU_add_test(suite, "scsi init - set optimal_unmap_granularity", \ scsi_init_set_optimal_unmap_granularity_config_param) == NULL || CU_add_test(suite, "scsi init - set unmap_granularity_alignment", \ diff --git a/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c b/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c index 51a5405e4..d342efb29 100644 --- a/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c +++ b/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c @@ -108,12 +108,6 @@ spdk_bdev_get_product_name(const struct spdk_bdev *bdev) return "test product"; } -uint32_t -spdk_bdev_get_max_unmap_descriptors(const struct spdk_bdev *bdev) -{ - return 1; -} - bool spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) { @@ -215,8 +209,7 @@ spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct spdk_scsi_unmap_bdesc *unmap_d, - uint16_t bdesc_count, + uint64_t offset, uint64_t length, spdk_bdev_io_completion_cb cb, void *cb_arg) { return 0;