Spdk/lib/bdev/gpt/vbdev_gpt.c

/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This driver reads a GPT partition table from a bdev and exposes a virtual block device for
 * each partition.
 */

#include "spdk/stdinc.h"

#include "spdk/rpc.h"
#include "spdk/conf.h"
#include "spdk/endian.h"
#include "spdk/io_channel.h"
#include "spdk/env.h"
#include "spdk/string.h"

#include "spdk_internal/bdev.h"
#include "spdk_internal/log.h"

#include "gpt.h"

SPDK_DECLARE_BDEV_MODULE(gpt);

/* Base block device gpt context */
struct spdk_gpt_bdev {
	struct spdk_bdev_desc *bdev_desc;
	struct spdk_bdev *bdev;
	struct spdk_gpt gpt;
	struct spdk_io_channel *ch;
	uint32_t ref;
};

/* Context for each gpt virtual bdev */
struct gpt_partition_disk {
	struct spdk_bdev	disk;
	struct spdk_bdev	*base_bdev;
	uint32_t		partition_index;
	struct spdk_gpt_bdev	*gpt_base;
	uint64_t		offset_blocks;
	uint64_t		offset_bytes;
	TAILQ_ENTRY(gpt_partition_disk)	tailq;
};

static TAILQ_HEAD(, gpt_partition_disk) g_gpt_partition_disks = TAILQ_HEAD_INITIALIZER(
			g_gpt_partition_disks);
static TAILQ_HEAD(, spdk_bdev) g_bdevs = TAILQ_HEAD_INITIALIZER(g_bdevs);

static int g_gpt_base_num;
static bool g_gpt_init_done;
static bool g_gpt_disabled;

static void
spdk_gpt_bdev_free(struct spdk_gpt_bdev *gpt_bdev)
{
	if (!gpt_bdev) {
		return;
	}

	if (gpt_bdev->ch) {
		spdk_put_io_channel(gpt_bdev->ch);
	}

	spdk_dma_free(gpt_bdev->gpt.buf);
	free(gpt_bdev);
}

static struct spdk_gpt_bdev *
spdk_gpt_base_bdev_init(struct spdk_bdev *bdev)
{
	struct spdk_gpt_bdev *gpt_bdev;
	struct spdk_gpt *gpt;
	int rc;

	gpt_bdev = calloc(1, sizeof(*gpt_bdev));
	if (!gpt_bdev) {
		SPDK_ERRLOG("Cannot alloc memory for gpt_bdev pointer\n");
		return NULL;
	}

	gpt_bdev->bdev = bdev;
	gpt_bdev->ref = 0;

	gpt = &gpt_bdev->gpt;
	gpt->buf = spdk_dma_zmalloc(SPDK_GPT_BUFFER_SIZE, 0x1000, NULL);
	if (!gpt->buf) {
		spdk_gpt_bdev_free(gpt_bdev);
		SPDK_ERRLOG("Cannot alloc buf\n");
		return NULL;
	}

	gpt->sector_size = bdev->blocklen;
	gpt->total_sectors = bdev->blockcnt;
	gpt->lba_start = 0;
	gpt->lba_end = gpt->total_sectors - 1;

	rc = spdk_bdev_open(gpt_bdev->bdev, false, NULL, NULL, &gpt_bdev->bdev_desc);
	if (rc != 0) {
		SPDK_ERRLOG("Could not open bdev %s, error=%d\n",
			    spdk_bdev_get_name(gpt_bdev->bdev), rc);
		spdk_gpt_bdev_free(gpt_bdev);
		return NULL;
	}

	gpt_bdev->ch = spdk_bdev_get_io_channel(gpt_bdev->bdev_desc);
	if (!gpt_bdev->ch) {
		SPDK_ERRLOG("Cannot allocate ch\n");
		spdk_gpt_bdev_free(gpt_bdev);
		return NULL;
	}

	return gpt_bdev;

}

static void
gpt_read(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io)
{
	bdev_io->u.read.offset += gpt_partition_disk->offset_bytes;
}

static void
gpt_write(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io)
{
	bdev_io->u.write.offset += gpt_partition_disk->offset_bytes;
}

static void
gpt_unmap(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io)
{
	uint16_t i;
	uint64_t lba;

	for (i = 0; i < bdev_io->u.unmap.bdesc_count; i++) {
		lba = from_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba);
		lba += gpt_partition_disk->offset_blocks;
		to_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba, lba);
	}
}

static void
gpt_flush(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io)
{
	bdev_io->u.flush.offset += gpt_partition_disk->offset_bytes;
}


static void
_vbdev_gpt_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *gpt_io = cb_arg;
	struct spdk_io_channel *base_ch = *(struct spdk_io_channel **)gpt_io->driver_ctx;

	spdk_put_io_channel(base_ch);
	spdk_bdev_io_complete(gpt_io, success);
	spdk_bdev_free_io(bdev_io);
}

static void
vbdev_gpt_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
	struct gpt_partition_disk *gpt_partition_disk = bdev_io->bdev->ctxt;
	struct spdk_io_channel *base_ch;

	/* Modify the I/O to adjust for the offset within the base bdev. */
	switch (bdev_io->type) {
	case SPDK_BDEV_IO_TYPE_READ:
		gpt_read(gpt_partition_disk, bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		gpt_write(gpt_partition_disk, bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_UNMAP:
		gpt_unmap(gpt_partition_disk, bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_FLUSH:
		gpt_flush(gpt_partition_disk, bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_RESET:
		base_ch = spdk_get_io_channel(gpt_partition_disk->base_bdev);
		*(struct spdk_io_channel **)bdev_io->driver_ctx = base_ch;
		spdk_bdev_reset(gpt_partition_disk->base_bdev, base_ch,
				_vbdev_gpt_complete_reset, bdev_io);
		return;
	default:
		SPDK_ERRLOG("gpt: unknown I/O type %d\n", bdev_io->type);
		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
		return;
	}

	/* Submit the modified I/O to the underlying bdev. */
	spdk_bdev_io_resubmit(bdev_io, gpt_partition_disk->base_bdev);
}

static void
vbdev_gpt_base_get_ref(struct spdk_gpt_bdev *gpt_base,
		       struct gpt_partition_disk *gpt_partition_disk)
{
	__sync_fetch_and_add(&gpt_base->ref, 1);
	gpt_partition_disk->gpt_base = gpt_base;
}

static void
vbdev_gpt_base_put_ref(struct spdk_gpt_bdev *gpt_base)
{
	if (__sync_sub_and_fetch(&gpt_base->ref, 1) == 0) {
		spdk_gpt_bdev_free(gpt_base);
	}
}

static void
vbdev_gpt_free(struct gpt_partition_disk *gpt_partition_disk)
{
	struct spdk_gpt_bdev *gpt_base;

	if (!gpt_partition_disk) {
		return;
	}

	gpt_base = gpt_partition_disk->gpt_base;

	TAILQ_REMOVE(&g_gpt_partition_disks, gpt_partition_disk, tailq);
	free(gpt_partition_disk->disk.name);
	free(gpt_partition_disk);

	assert(gpt_base != NULL);
	vbdev_gpt_base_put_ref(gpt_base);
}

static int
vbdev_gpt_destruct(void *ctx)
{
	struct gpt_partition_disk *gpt_partition_disk = ctx;

	vbdev_gpt_free(gpt_partition_disk);
	return 0;
}

static bool
vbdev_gpt_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
{
	struct gpt_partition_disk *gpt_partition_disk = ctx;

	return gpt_partition_disk->base_bdev->fn_table->io_type_supported(gpt_partition_disk->base_bdev,
			io_type);
}

static struct spdk_io_channel *
vbdev_gpt_get_io_channel(void *ctx)
{
	struct gpt_partition_disk *gpt_partition_disk = ctx;

	return gpt_partition_disk->base_bdev->fn_table->get_io_channel(gpt_partition_disk->base_bdev);
}

static void
write_guid(struct spdk_json_write_ctx *w, const struct spdk_gpt_guid *guid)
{
	spdk_json_write_string_fmt(w, "%08x-%04x-%04x-%04x-%04x%08x",
				   from_le32(&guid->raw[0]),
				   from_le16(&guid->raw[4]),
				   from_le16(&guid->raw[6]),
				   from_be16(&guid->raw[8]),
				   from_be16(&guid->raw[10]),
				   from_be32(&guid->raw[12]));
}

static int
vbdev_gpt_dump_config_json(void *ctx, struct spdk_json_write_ctx *w)
{
	struct gpt_partition_disk *gpt_partition_disk = ctx;
	struct spdk_gpt *gpt = &gpt_partition_disk->gpt_base->gpt;
	struct spdk_gpt_partition_entry *gpt_entry = &gpt->partitions[gpt_partition_disk->partition_index];

	spdk_json_write_name(w, "gpt");
	spdk_json_write_object_begin(w);

	spdk_json_write_name(w, "base_bdev");
	spdk_json_write_string(w, spdk_bdev_get_name(gpt_partition_disk->base_bdev));

	spdk_json_write_name(w, "offset_blocks");
	spdk_json_write_uint64(w, gpt_partition_disk->offset_blocks);

	spdk_json_write_name(w, "partition_type_guid");
	write_guid(w, &gpt_entry->part_type_guid);

	spdk_json_write_name(w, "unique_partition_guid");
	write_guid(w, &gpt_entry->unique_partition_guid);

	spdk_json_write_object_end(w);

	return 0;
}

static struct spdk_bdev_fn_table vbdev_gpt_fn_table = {
	.destruct		= vbdev_gpt_destruct,
	.io_type_supported	= vbdev_gpt_io_type_supported,
	.submit_request		= vbdev_gpt_submit_request,
	.get_io_channel		= vbdev_gpt_get_io_channel,
	.dump_config_json	= vbdev_gpt_dump_config_json,
};

static int
vbdev_gpt_create_bdevs(struct spdk_gpt_bdev *gpt_bdev)
{
	uint32_t num_partition_entries;
	uint64_t i, head_lba_start, head_lba_end;
	struct spdk_gpt_partition_entry *p;
	struct gpt_partition_disk *d;
	struct spdk_bdev *base_bdev = gpt_bdev->bdev;
	struct spdk_gpt *gpt;

	gpt = &gpt_bdev->gpt;
	num_partition_entries = from_le32(&gpt->header->num_partition_entries);
	head_lba_start = from_le64(&gpt->header->first_usable_lba);
	head_lba_end = from_le64(&gpt->header->last_usable_lba);

	for (i = 0; i < num_partition_entries; i++) {
		p = &gpt->partitions[i];
		uint64_t lba_start = from_le64(&p->starting_lba);
		uint64_t lba_end = from_le64(&p->ending_lba);

		if (!SPDK_GPT_GUID_EQUAL(&gpt->partitions[i].part_type_guid,
					 &SPDK_GPT_PART_TYPE_GUID) ||
		    lba_start == 0) {
			continue;
		}
		if (lba_start < head_lba_start || lba_end > head_lba_end) {
			continue;
		}

		d = calloc(1, sizeof(*d));
		if (!d) {
			SPDK_ERRLOG("Memory allocation failure\n");
			return -1;
		}

		/* Copy properties of the base bdev */
		d->disk.blocklen = base_bdev->blocklen;
		d->disk.write_cache = base_bdev->write_cache;
		d->disk.need_aligned_buffer = base_bdev->need_aligned_buffer;
		d->disk.max_unmap_bdesc_count = base_bdev->max_unmap_bdesc_count;

		/* index start at 1 instead of 0 to match the existing style */
		d->disk.name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1);
		if (!d->disk.name) {
			free(d);
			SPDK_ERRLOG("Failed to allocate disk name\n");
			return -1;
		}

		d->partition_index = i;
		d->disk.product_name = "GPT Disk";
		d->base_bdev = base_bdev;
		d->offset_bytes = lba_start * gpt->sector_size;
		d->offset_blocks = lba_start;
		d->disk.blockcnt = lba_end - lba_start;
		d->disk.ctxt = d;
		d->disk.fn_table = &vbdev_gpt_fn_table;
		d->disk.module = SPDK_GET_BDEV_MODULE(gpt);

		SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "gpt vbdev %s: base bdev: %s offset_bytes: "
			      "%" PRIu64 " offset_blocks: %" PRIu64 "\n",
			      d->disk.name, spdk_bdev_get_name(base_bdev), d->offset_bytes, d->offset_blocks);

		vbdev_gpt_base_get_ref(gpt_bdev, d);

		spdk_vbdev_register(&d->disk, &base_bdev, 1);

		TAILQ_INSERT_TAIL(&g_gpt_partition_disks, d, tailq);
	}

	return 0;
}

static void
spdk_gpt_bdev_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg)
{
	struct spdk_gpt_bdev *gpt_bdev = (struct spdk_gpt_bdev *)arg;
	static int bdev_init_num = 0;
	int rc;

	/* free the ch and also close the bdev_desc */
	spdk_put_io_channel(gpt_bdev->ch);
	gpt_bdev->ch = NULL;
	spdk_bdev_close(gpt_bdev->bdev_desc);
	gpt_bdev->bdev_desc = NULL;

	bdev_init_num++;
	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
		SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n",
			    spdk_bdev_get_name(gpt_bdev->bdev), status);
		goto end;
	}

	rc = spdk_gpt_parse(&gpt_bdev->gpt);
	if (rc) {
		SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "Failed to parse gpt\n");
		goto end;
	}

	rc = vbdev_gpt_create_bdevs(gpt_bdev);
	if (rc < 0) {
		SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "Failed to split dev=%s by gpt table\n",
			      spdk_bdev_get_name(gpt_bdev->bdev));
	}

end:
	spdk_bdev_free_io(bdev_io);
	if (gpt_bdev->ref == 0) {
		/* If no gpt_partition_disk instances were created, free the base context */
		spdk_gpt_bdev_free(gpt_bdev);
	}

	if (!g_gpt_init_done) {
		/* Call next vbdev module init after the last gpt creation */
		if (bdev_init_num == g_gpt_base_num) {
			g_gpt_init_done = true;
			spdk_vbdev_module_init_next(0);
		}
	}
}

static int
vbdev_gpt_read_gpt(struct spdk_bdev *bdev)
{
	struct spdk_gpt_bdev *gpt_bdev;
	int rc;

	gpt_bdev = spdk_gpt_base_bdev_init(bdev);
	if (!gpt_bdev) {
		SPDK_ERRLOG("Cannot allocated gpt_bdev\n");
		return -1;
	}

	rc = spdk_bdev_read(gpt_bdev->bdev, gpt_bdev->ch, gpt_bdev->gpt.buf, 0, SPDK_GPT_BUFFER_SIZE,
			    spdk_gpt_bdev_complete, gpt_bdev);
	if (rc < 0) {
		spdk_gpt_bdev_free(gpt_bdev);
		SPDK_ERRLOG("Failed to send bdev_io command\n");
		return -1;
	}

	return 0;
}

static void
vbdev_gpt_init(void)
{
	struct spdk_bdev *base_bdev, *tmp;
	int rc = 0;
	struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Gpt");

	if (sp && spdk_conf_section_get_boolval(sp, "Disable", false)) {
		/* Disable Gpt probe */
		g_gpt_disabled = true;
		goto end;
	}

	TAILQ_FOREACH_SAFE(base_bdev, &g_bdevs, link, tmp) {
		TAILQ_REMOVE(&g_bdevs, base_bdev, link);
		rc = vbdev_gpt_read_gpt(base_bdev);
		if (rc) {
			SPDK_ERRLOG("Failed to read info from bdev %s\n",
				    spdk_bdev_get_name(base_bdev));
			continue;
		}
		g_gpt_base_num++;

	}

	if (!g_gpt_base_num) {
		g_gpt_init_done = true;
	}
end:
	/* if no gpt bdev num is counted, just call vbdev_module_init_next */
	if (!g_gpt_base_num) {
		spdk_vbdev_module_init_next(rc);
	}
}

static void
vbdev_gpt_fini(void)
{
	struct gpt_partition_disk *gpt_partition_disk, *tmp;

	TAILQ_FOREACH_SAFE(gpt_partition_disk, &g_gpt_partition_disks, tailq, tmp) {
		vbdev_gpt_free(gpt_partition_disk);
	}
}

static void
vbdev_gpt_examine(struct spdk_bdev *bdev)
{
	/*
	 * TODO: this will get fixed in a later patch which refactors
	 * the GPT init/register code.  For now, nothing is operating
	 * on the examination counts so just immediately indicate
	 * that the examination is done.
	 */
	spdk_vbdev_module_examine_done(SPDK_GET_BDEV_MODULE(gpt));
	if (g_gpt_disabled) {
		return;
	}

	if (!g_gpt_init_done) {
		TAILQ_INSERT_TAIL(&g_bdevs, bdev, link);
	}
}

static int
vbdev_gpt_get_ctx_size(void)
{
	/*
	 * Note: this context is only used for RESET operations, since it is the only
	 *  I/O type that does not just resubmit to the base bdev.
	 */
	return sizeof(struct spdk_io_channel *);
}

SPDK_VBDEV_MODULE_REGISTER(gpt, vbdev_gpt_init, vbdev_gpt_fini, NULL,
			   vbdev_gpt_get_ctx_size, vbdev_gpt_examine)
SPDK_LOG_REGISTER_TRACE_FLAG("vbdev_gpt", SPDK_TRACE_VBDEV_GPT)