diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile index 051a5c347..7f949ea28 100644 --- a/lib/bdev/Makefile +++ b/lib/bdev/Makefile @@ -44,7 +44,7 @@ C_SRCS-$(CONFIG_VTUNE) += vtune.c LIBNAME = bdev -DIRS-y += error malloc null nvme rpc split +DIRS-y += error gpt malloc null nvme rpc split ifeq ($(OS),Linux) DIRS-y += aio diff --git a/lib/bdev/gpt/Makefile b/lib/bdev/gpt/Makefile new file mode 100644 index 000000000..10aa78af1 --- /dev/null +++ b/lib/bdev/gpt/Makefile @@ -0,0 +1,41 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +CFLAGS += $(ENV_CFLAGS) -I$(SPDK_ROOT_DIR)/lib/bdev/ +C_SRCS = gpt.c vbdev_gpt.c +LIBNAME = vbdev_gpt + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/bdev/gpt/gpt.c b/lib/bdev/gpt/gpt.c new file mode 100644 index 000000000..66b929e9c --- /dev/null +++ b/lib/bdev/gpt/gpt.c @@ -0,0 +1,271 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk_internal/event.h" +#include "spdk_internal/bdev.h" + +#include "gpt.h" + +#include "spdk/event.h" +#include "spdk/endian.h" +#include "spdk/env.h" +#include "spdk/log.h" +#include "spdk/io_channel.h" + +#define GPT_PRIMARY_PARTITION_TABLE_LBA 0x1 +#define PRIMARY_PARTITION_NUMBER 4 +#define GPT_PROTECTIVE_MBR 1 +#define SPDK_MAX_NUM_PARTITION_ENTRIES 128 +#define SPDK_GPT_CRC32C_POLYNOMIAL_REFLECT 0xedb88320UL + +static uint32_t spdk_gpt_crc32_table[256]; + +__attribute__((constructor)) static void +spdk_gpt_init_crc32(void) +{ + int i, j; + uint32_t val; + + for (i = 0; i < 256; i++) { + val = i; + for (j = 0; j < 8; j++) { + if (val & 1) { + val = (val >> 1) ^ SPDK_GPT_CRC32C_POLYNOMIAL_REFLECT; + } else { + val = (val >> 1); + } + } + spdk_gpt_crc32_table[i] = val; + } +} + +static uint32_t +spdk_gpt_crc32(const uint8_t *buf, uint32_t size, uint32_t seed) +{ + uint32_t i, crc32 = seed; + + for (i = 0; i < size; i++) { + crc32 = spdk_gpt_crc32_table[(crc32 ^ buf[i]) & 0xff] ^ (crc32 >> 8); + } + + return crc32 ^ seed; +} + +static int +spdk_gpt_read_partitions(struct spdk_gpt *gpt) +{ + uint32_t total_partition_size, num_partition_entries, partition_entry_size; + uint64_t partition_start_lba; + struct spdk_gpt_header *head = gpt->header; + uint32_t crc32; + + num_partition_entries = from_le32(&head->num_partition_entries); + if (num_partition_entries > SPDK_MAX_NUM_PARTITION_ENTRIES) { + SPDK_ERRLOG("Num_partition_entries=%u which exceeds max=%u\n", + num_partition_entries, SPDK_MAX_NUM_PARTITION_ENTRIES); + return -1; + } + + partition_entry_size = from_le32(&head->size_of_partition_entry); + if (partition_entry_size != sizeof(struct spdk_gpt_partition_entry)) { + SPDK_ERRLOG("Partition_entry_size(%x) != expected(%lx)\n", + partition_entry_size, sizeof(struct spdk_gpt_partition_entry)); + return -1; + } + + total_partition_size = num_partition_entries * partition_entry_size; + partition_start_lba = from_le64(&head->partition_entry_lba); + if ((total_partition_size + partition_start_lba * gpt->sector_size) > SPDK_GPT_BUFFER_SIZE) { + SPDK_ERRLOG("Buffer size is not enough\n"); + return -1; + } + + gpt->partitions = (struct spdk_gpt_partition_entry *)(gpt->buf + + partition_start_lba * gpt->sector_size); + + crc32 = spdk_gpt_crc32((uint8_t *)gpt->partitions, total_partition_size, ~0); + + if (crc32 != from_le32(&head->partition_entry_array_crc32)) { + SPDK_ERRLOG("GPT partition entry array crc32 did not match\n"); + return -1; + } + + return 0; +} + +static int +spdk_gpt_lba_range_check(struct spdk_gpt_header *head, uint64_t lba_end) +{ + uint64_t usable_lba_start, usable_lba_end; + + usable_lba_start = from_le64(&head->first_usable_lba); + usable_lba_end = from_le64(&head->last_usable_lba); + + if (usable_lba_end < usable_lba_start) { + SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") < usable_lba_start(%" PRIu64 ")\n", + usable_lba_end, usable_lba_start); + return -1; + } + + if (usable_lba_end > lba_end) { + SPDK_ERRLOG("Head's usable_lba_end(%" PRIu64 ") > lba_end(%" PRIu64 ")\n", + usable_lba_end, lba_end); + return -1; + } + + if ((usable_lba_start < GPT_PRIMARY_PARTITION_TABLE_LBA) && + (GPT_PRIMARY_PARTITION_TABLE_LBA < usable_lba_end)) { + SPDK_ERRLOG("Head lba is not in the usable range\n"); + return -1; + } + + return 0; +} + +static int +spdk_gpt_read_header(struct spdk_gpt *gpt) +{ + uint32_t head_size; + uint32_t new_crc, original_crc; + struct spdk_gpt_header *head; + + head = (struct spdk_gpt_header *)(gpt->buf + GPT_PRIMARY_PARTITION_TABLE_LBA * gpt->sector_size); + head_size = from_le32(&head->header_size); + if (head_size < sizeof(*head) || head_size > gpt->sector_size) { + SPDK_ERRLOG("head_size=%u\n", head_size); + return -1; + } + + original_crc = from_le32(&head->header_crc32); + head->header_crc32 = 0; + new_crc = spdk_gpt_crc32((uint8_t *)head, from_le32(&head->header_size), ~0); + /* restore header crc32 */ + to_le32(&head->header_crc32, original_crc); + + if (new_crc != original_crc) { + SPDK_ERRLOG("head crc32 does not match, provided=%u, caculated=%u\n", + original_crc, new_crc); + return -1; + } + + if (memcmp(SPDK_GPT_SIGNATURE, head->gpt_signature, + sizeof(head->gpt_signature))) { + SPDK_ERRLOG("signature did not match\n"); + return -1; + } + + if (spdk_gpt_lba_range_check(head, gpt->lba_end)) { + SPDK_ERRLOG("lba range check error\n"); + return -1; + } + + gpt->header = head; + return 0; +} + +static int +spdk_gpt_check_mbr(struct spdk_gpt *gpt) +{ + int i, primary_partition = 0; + uint32_t total_lba_size = 0, ret = 0, expected_start_lba; + struct spdk_mbr *mbr; + + mbr = (struct spdk_mbr *)gpt->buf; + if (from_le16(&mbr->mbr_signature) != SPDK_MBR_SIGNATURE) { + SPDK_ERRLOG("Signature mismatch, provided=%x, expected=%x\n", from_le16(&mbr->disk_signature), + SPDK_MBR_SIGNATURE); + return -1; + } + + to_le32(&expected_start_lba, GPT_PRIMARY_PARTITION_TABLE_LBA); + if (mbr->partitions[0].start_lba != expected_start_lba) { + SPDK_ERRLOG("start lba mismatch, provided=%u, expected=%u\n", + mbr->partitions[0].start_lba, expected_start_lba); + return -1; + } + + for (i = 0; i < PRIMARY_PARTITION_NUMBER; i++) { + if (mbr->partitions[i].os_type == SPDK_MBR_OS_TYPE_GPT_PROTECTIVE) { + primary_partition = i; + ret = GPT_PROTECTIVE_MBR; + break; + } + } + + if (ret == GPT_PROTECTIVE_MBR) { + total_lba_size = from_le32(&mbr->partitions[primary_partition].size_lba); + if ((total_lba_size != ((uint32_t) gpt->total_sectors - 1)) && + (total_lba_size != 0xFFFFFFFF)) { + SPDK_ERRLOG("GPT Primary MBR size does not equal: (record_size %u != actual_size %u)!\n", + total_lba_size, (uint32_t) gpt->total_sectors - 1); + return -1; + } + } else { + SPDK_ERRLOG("Currently only support GPT Protective MBR format\n"); + return -1; + } + + gpt->mbr = mbr; + return 0; +} + +int +spdk_gpt_parse(struct spdk_gpt *gpt) +{ + int rc; + + if (!gpt || !gpt->buf) { + SPDK_ERRLOG("Gpt and the related buffer should not be NULL\n"); + return -1; + } + + rc = spdk_gpt_check_mbr(gpt); + if (rc) { + SPDK_ERRLOG("Failed to check mbr_info\n"); + return rc; + } + + rc = spdk_gpt_read_header(gpt); + if (rc) { + SPDK_ERRLOG("Failed to read gpt header\n"); + return rc; + } + + rc = spdk_gpt_read_partitions(gpt); + if (rc) { + SPDK_ERRLOG("Failed to read gpt partitions\n"); + return rc; + } + + return 0; +} diff --git a/lib/bdev/gpt/gpt.h b/lib/bdev/gpt/gpt.h new file mode 100644 index 000000000..54aa35dfa --- /dev/null +++ b/lib/bdev/gpt/gpt.h @@ -0,0 +1,61 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * GPT internal Interface + */ + +#ifndef SPDK_INTERNAL_GPT_H +#define SPDK_INTERNAL_GPT_H + +#include "spdk/gpt_spec.h" +#include "spdk/bdev.h" + +#define SPDK_GPT_PART_TYPE_GUID SPDK_GPT_GUID(0x7c5222bd, 0x8f5d, 0x4087, 0x9c00, 0xbf9843c7b58c) +#define SPDK_GPT_BUFFER_SIZE 32768 /* 32KB */ +#define SPDK_GPT_UUID_EQUAL(x,y) (memcmp(x, y, sizeof(struct spdk_gpt_guid)) == 0) + +struct spdk_gpt { + unsigned char *buf; + uint64_t lba_start; + uint64_t lba_end; + uint64_t total_sectors; + uint32_t sector_size; + struct spdk_mbr *mbr; + struct spdk_gpt_header *header; + struct spdk_gpt_partition_entry *partitions; +}; + +int spdk_gpt_parse(struct spdk_gpt *gpt); + +#endif /* SPDK_INTERNAL_GPT_H */ diff --git a/lib/bdev/gpt/vbdev_gpt.c b/lib/bdev/gpt/vbdev_gpt.c new file mode 100644 index 000000000..c603b40c8 --- /dev/null +++ b/lib/bdev/gpt/vbdev_gpt.c @@ -0,0 +1,527 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This driver reads a GPT partition table from a bdev and exposes a virtual block device for + * each partition. + */ + +#include "spdk/stdinc.h" + +#include "spdk/rpc.h" +#include "spdk/conf.h" +#include "spdk/endian.h" +#include "spdk/io_channel.h" +#include "spdk/env.h" +#include "spdk/string.h" + +#include "spdk_internal/bdev.h" +#include "spdk_internal/log.h" + +#include "gpt.h" + +/* Base block device gpt context */ +struct spdk_gpt_bdev { + struct spdk_bdev_desc *bdev_desc; + struct spdk_bdev *bdev; + struct spdk_gpt gpt; + struct spdk_io_channel *ch; + uint32_t ref; +}; + +/* Context for each gpt virtual bdev */ +struct gpt_partition_disk { + struct spdk_bdev disk; + struct spdk_bdev *base_bdev; + struct spdk_gpt_bdev *gpt_base; + uint64_t offset_blocks; + uint64_t offset_bytes; + TAILQ_ENTRY(gpt_partition_disk) tailq; +}; + +static TAILQ_HEAD(, gpt_partition_disk) g_gpt_partition_disks = TAILQ_HEAD_INITIALIZER( + g_gpt_partition_disks); +static TAILQ_HEAD(, spdk_bdev) g_bdevs = TAILQ_HEAD_INITIALIZER(g_bdevs); + +static int g_gpt_base_num; +static bool g_gpt_init_done; +static bool g_gpt_disabled; + +static void +spdk_gpt_bdev_free(struct spdk_gpt_bdev *gpt_bdev) +{ + if (!gpt_bdev) { + return; + } + + if (gpt_bdev->ch) { + spdk_put_io_channel(gpt_bdev->ch); + } + + spdk_dma_free(gpt_bdev->gpt.buf); + free(gpt_bdev); +} + +static struct spdk_gpt_bdev * +spdk_gpt_base_bdev_init(struct spdk_bdev *bdev) +{ + struct spdk_gpt_bdev *gpt_bdev; + struct spdk_gpt *gpt; + int rc; + + gpt_bdev = calloc(1, sizeof(*gpt_bdev)); + if (!gpt_bdev) { + SPDK_ERRLOG("Cannot alloc memory for gpt_bdev pointer\n"); + return NULL; + } + + gpt_bdev->bdev = bdev; + gpt_bdev->ref = 0; + + gpt = &gpt_bdev->gpt; + gpt->buf = spdk_dma_zmalloc(SPDK_GPT_BUFFER_SIZE, 0x1000, NULL); + if (!gpt->buf) { + spdk_gpt_bdev_free(gpt_bdev); + SPDK_ERRLOG("Cannot alloc buf\n"); + return NULL; + } + + gpt->sector_size = bdev->blocklen; + gpt->total_sectors = bdev->blockcnt; + gpt->lba_start = 0; + gpt->lba_end = gpt->total_sectors - 1; + + rc = spdk_bdev_open(gpt_bdev->bdev, false, NULL, NULL, &gpt_bdev->bdev_desc); + if (rc != 0) { + SPDK_ERRLOG("Could not open bdev %s, error=%d\n", + spdk_bdev_get_name(gpt_bdev->bdev), rc); + spdk_gpt_bdev_free(gpt_bdev); + return NULL; + } + + gpt_bdev->ch = spdk_bdev_get_io_channel(gpt_bdev->bdev_desc); + if (!gpt_bdev->ch) { + SPDK_ERRLOG("Cannot allocate ch\n"); + spdk_gpt_bdev_free(gpt_bdev); + return NULL; + } + + return gpt_bdev; + +} + +static void +gpt_read(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io) +{ + bdev_io->u.read.offset += gpt_partition_disk->offset_bytes; +} + +static void +gpt_write(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io) +{ + bdev_io->u.write.offset += gpt_partition_disk->offset_bytes; +} + +static void +gpt_unmap(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io) +{ + uint16_t i; + uint64_t lba; + + for (i = 0; i < bdev_io->u.unmap.bdesc_count; i++) { + lba = from_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba); + lba += gpt_partition_disk->offset_blocks; + to_be64(&bdev_io->u.unmap.unmap_bdesc[i].lba, lba); + } +} + +static void +gpt_flush(struct gpt_partition_disk *gpt_partition_disk, struct spdk_bdev_io *bdev_io) +{ + bdev_io->u.flush.offset += gpt_partition_disk->offset_bytes; +} + + +static void +_vbdev_gpt_complete_reset(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *gpt_io = cb_arg; + struct spdk_io_channel *base_ch = *(struct spdk_io_channel **)gpt_io->driver_ctx; + + spdk_put_io_channel(base_ch); + spdk_bdev_io_complete(gpt_io, success); + spdk_bdev_free_io(bdev_io); +} + +static void +vbdev_gpt_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct gpt_partition_disk *gpt_partition_disk = bdev_io->bdev->ctxt; + struct spdk_io_channel *base_ch; + + /* Modify the I/O to adjust for the offset within the base bdev. */ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + gpt_read(gpt_partition_disk, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_WRITE: + gpt_write(gpt_partition_disk, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_UNMAP: + gpt_unmap(gpt_partition_disk, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + gpt_flush(gpt_partition_disk, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_RESET: + base_ch = spdk_get_io_channel(gpt_partition_disk->base_bdev); + *(struct spdk_io_channel **)bdev_io->driver_ctx = base_ch; + spdk_bdev_reset(gpt_partition_disk->base_bdev, base_ch, + _vbdev_gpt_complete_reset, bdev_io); + return; + default: + SPDK_ERRLOG("gpt: unknown I/O type %d\n", bdev_io->type); + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + /* Submit the modified I/O to the underlying bdev. */ + spdk_bdev_io_resubmit(bdev_io, gpt_partition_disk->base_bdev); +} + +static int +vbdev_gpt_base_get_ref(struct spdk_gpt_bdev *gpt_base, + struct gpt_partition_disk *gpt_partition_disk) +{ + __sync_fetch_and_add(&gpt_base->ref, 1); + gpt_partition_disk->gpt_base = gpt_base; + + return 0; +} + +static void +vbdev_gpt_base_put_ref(struct spdk_gpt_bdev *gpt_base) +{ + if (__sync_sub_and_fetch(&gpt_base->ref, 1) == 0) { + spdk_gpt_bdev_free(gpt_base); + } +} + +static void +vbdev_gpt_free(struct gpt_partition_disk *gpt_partition_disk) +{ + struct spdk_gpt_bdev *gpt_base; + + if (!gpt_partition_disk) { + return; + } + + gpt_base = gpt_partition_disk->gpt_base; + + TAILQ_REMOVE(&g_gpt_partition_disks, gpt_partition_disk, tailq); + free(gpt_partition_disk->disk.name); + free(gpt_partition_disk); + + assert(gpt_base != NULL); + vbdev_gpt_base_put_ref(gpt_base); +} + +static int +vbdev_gpt_destruct(void *ctx) +{ + struct gpt_partition_disk *gpt_partition_disk = ctx; + + vbdev_gpt_free(gpt_partition_disk); + return 0; +} + +static bool +vbdev_gpt_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + struct gpt_partition_disk *gpt_partition_disk = ctx; + + return gpt_partition_disk->base_bdev->fn_table->io_type_supported(gpt_partition_disk->base_bdev, + io_type); +} + +static struct spdk_io_channel * +vbdev_gpt_get_io_channel(void *ctx) +{ + struct gpt_partition_disk *gpt_partition_disk = ctx; + + return gpt_partition_disk->base_bdev->fn_table->get_io_channel(gpt_partition_disk->base_bdev); +} + +static int +vbdev_gpt_dump_config_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct gpt_partition_disk *gpt_partition_disk = ctx; + + spdk_json_write_name(w, "gpt"); + spdk_json_write_object_begin(w); + + spdk_json_write_name(w, "base_bdev"); + spdk_json_write_string(w, spdk_bdev_get_name(gpt_partition_disk->base_bdev)); + spdk_json_write_name(w, "offset_blocks"); + spdk_json_write_uint64(w, gpt_partition_disk->offset_blocks); + + spdk_json_write_object_end(w); + + return 0; +} + +static struct spdk_bdev_fn_table vbdev_gpt_fn_table = { + .destruct = vbdev_gpt_destruct, + .io_type_supported = vbdev_gpt_io_type_supported, + .submit_request = vbdev_gpt_submit_request, + .get_io_channel = vbdev_gpt_get_io_channel, + .dump_config_json = vbdev_gpt_dump_config_json, +}; + +static int +vbdev_gpt_create_bdevs(struct spdk_gpt_bdev *gpt_bdev) +{ + uint32_t num_partition_entries; + uint64_t i, head_lba_start, head_lba_end; + struct spdk_gpt_partition_entry *p; + struct gpt_partition_disk *d; + struct spdk_bdev *base_bdev = gpt_bdev->bdev; + struct spdk_gpt *gpt; + int rc; + + gpt = &gpt_bdev->gpt; + num_partition_entries = from_le32(&gpt->header->num_partition_entries); + head_lba_start = from_le64(&gpt->header->first_usable_lba); + head_lba_end = from_le64(&gpt->header->last_usable_lba); + + for (i = 0; i < num_partition_entries; i++) { + p = &gpt->partitions[i]; + uint64_t lba_start = from_le64(&p->starting_lba); + uint64_t lba_end = from_le64(&p->ending_lba); + + if (!SPDK_GPT_UUID_EQUAL(&gpt->partitions[i].unique_partition_guid, + &SPDK_GPT_PART_TYPE_GUID) || + lba_start == 0) { + continue; + } + if (lba_start < head_lba_start || lba_end > head_lba_end) { + continue; + } + + d = calloc(1, sizeof(*d)); + if (!d) { + SPDK_ERRLOG("Memory allocation failure\n"); + return -1; + } + + /* Copy properties of the base bdev */ + d->disk.blocklen = base_bdev->blocklen; + d->disk.write_cache = base_bdev->write_cache; + d->disk.need_aligned_buffer = base_bdev->need_aligned_buffer; + d->disk.max_unmap_bdesc_count = base_bdev->max_unmap_bdesc_count; + + /* index start at 1 instead of 0 to match the existing style */ + d->disk.name = spdk_sprintf_alloc("%sp%" PRIu64, spdk_bdev_get_name(base_bdev), i + 1); + if (!d->disk.name) { + free(d); + SPDK_ERRLOG("Failed to allocate disk name\n"); + return -1; + } + + d->disk.product_name = "GPT Disk"; + d->base_bdev = base_bdev; + d->offset_bytes = lba_start * gpt->sector_size; + d->offset_blocks = lba_start; + d->disk.blockcnt = lba_end - lba_start; + d->disk.ctxt = d; + d->disk.fn_table = &vbdev_gpt_fn_table; + + SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "gpt vbdev %s: base bdev: %s offset_bytes: " + "%" PRIu64 " offset_blocks: %" PRIu64 "\n", + d->disk.name, spdk_bdev_get_name(base_bdev), d->offset_bytes, d->offset_blocks); + + rc = vbdev_gpt_base_get_ref(gpt_bdev, d); + if (rc < 0) { + free(d); + return -1; + } + + spdk_vbdev_register(&d->disk, &base_bdev, 1); + + TAILQ_INSERT_TAIL(&g_gpt_partition_disks, d, tailq); + } + + return 0; +} + +static void +spdk_gpt_bdev_complete(struct spdk_bdev_io *bdev_io, bool status, void *arg) +{ + struct spdk_gpt_bdev *gpt_bdev = (struct spdk_gpt_bdev *)arg; + static int bdev_init_num = 0; + int rc; + + /* free the ch and also close the bdev_desc */ + spdk_put_io_channel(gpt_bdev->ch); + gpt_bdev->ch = NULL; + spdk_bdev_close(gpt_bdev->bdev_desc); + gpt_bdev->bdev_desc = NULL; + + bdev_init_num++; + if (status != SPDK_BDEV_IO_STATUS_SUCCESS) { + SPDK_ERRLOG("Gpt: bdev=%s io error status=%d\n", + spdk_bdev_get_name(gpt_bdev->bdev), status); + goto end; + } + + rc = spdk_gpt_parse(&gpt_bdev->gpt); + if (rc) { + SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "Failed to parse gpt\n"); + goto end; + } + + rc = vbdev_gpt_create_bdevs(gpt_bdev); + if (rc < 0) { + SPDK_TRACELOG(SPDK_TRACE_VBDEV_GPT, "Failed to split dev=%s by gpt table\n", + spdk_bdev_get_name(gpt_bdev->bdev)); + } + +end: + spdk_bdev_free_io(bdev_io); + if (gpt_bdev->ref == 0) { + /* If no gpt_partition_disk instances were created, free the base context */ + spdk_gpt_bdev_free(gpt_bdev); + } + + if (!g_gpt_init_done) { + /* Call next vbdev module init after the last gpt creation */ + if (bdev_init_num == g_gpt_base_num) { + g_gpt_init_done = true; + spdk_vbdev_module_init_next(0); + } + } +} + +static int +vbdev_gpt_read_gpt(struct spdk_bdev *bdev) +{ + struct spdk_gpt_bdev *gpt_bdev; + int rc; + + gpt_bdev = spdk_gpt_base_bdev_init(bdev); + if (!gpt_bdev) { + SPDK_ERRLOG("Cannot allocated gpt_bdev\n"); + return -1; + } + + rc = spdk_bdev_read(gpt_bdev->bdev, gpt_bdev->ch, gpt_bdev->gpt.buf, 0, SPDK_GPT_BUFFER_SIZE, + spdk_gpt_bdev_complete, gpt_bdev); + if (rc < 0) { + spdk_gpt_bdev_free(gpt_bdev); + SPDK_ERRLOG("Failed to send bdev_io command\n"); + return -1; + } + + return 0; +} + +static void +vbdev_gpt_init(void) +{ + struct spdk_bdev *base_bdev, *tmp; + int rc = 0; + struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Gpt"); + + if (sp && spdk_conf_section_get_boolval(sp, "Disable", false)) { + /* Disable Gpt probe */ + g_gpt_disabled = true; + goto end; + } + + TAILQ_FOREACH_SAFE(base_bdev, &g_bdevs, link, tmp) { + TAILQ_REMOVE(&g_bdevs, base_bdev, link); + rc = vbdev_gpt_read_gpt(base_bdev); + if (rc) { + SPDK_ERRLOG("Failed to read info from bdev %s\n", + spdk_bdev_get_name(base_bdev)); + continue; + } + g_gpt_base_num++; + + } + + if (!g_gpt_base_num) { + g_gpt_init_done = true; + } +end: + /* if no gpt bdev num is counted, just call vbdev_module_init_next */ + if (!g_gpt_base_num) { + spdk_vbdev_module_init_next(rc); + } +} + +static void +vbdev_gpt_fini(void) +{ + struct gpt_partition_disk *gpt_partition_disk, *tmp; + + TAILQ_FOREACH_SAFE(gpt_partition_disk, &g_gpt_partition_disks, tailq, tmp) { + vbdev_gpt_free(gpt_partition_disk); + } +} + +static void +vbdev_gpt_register(struct spdk_bdev *bdev) +{ + if (g_gpt_disabled) { + return; + } + + if (!g_gpt_init_done) { + TAILQ_INSERT_TAIL(&g_bdevs, bdev, link); + } +} + +static int +vbdev_gpt_get_ctx_size(void) +{ + /* + * Note: this context is only used for RESET operations, since it is the only + * I/O type that does not just resubmit to the base bdev. + */ + return sizeof(struct spdk_io_channel *); +} + +SPDK_VBDEV_MODULE_REGISTER(vbdev_gpt_init, vbdev_gpt_fini, NULL, + vbdev_gpt_get_ctx_size, vbdev_gpt_register) +SPDK_LOG_REGISTER_TRACE_FLAG("vbdev_gpt", SPDK_TRACE_VBDEV_GPT) diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk index 4e7114b26..251488619 100644 --- a/mk/spdk.modules.mk +++ b/mk/spdk.modules.mk @@ -31,7 +31,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -BLOCKDEV_MODULES_LIST = bdev_malloc bdev_null bdev_nvme nvme vbdev_error vbdev_split +BLOCKDEV_MODULES_LIST = bdev_malloc bdev_null bdev_nvme nvme vbdev_error vbdev_gpt vbdev_split ifeq ($(CONFIG_RDMA),y) BLOCKDEV_MODULES_DEPS += -libverbs -lrdmacm diff --git a/scripts/autotest_common.sh b/scripts/autotest_common.sh index 9ad253b8e..b698fd0ca 100755 --- a/scripts/autotest_common.sh +++ b/scripts/autotest_common.sh @@ -32,6 +32,10 @@ config_params='--enable-debug --enable-werror' export UBSAN_OPTIONS='halt_on_error=1:print_stacktrace=1:abort_on_error=1' +export SPDK_GPT_UUID=`grep SPDK_GPT_PART_TYPE_GUID $rootdir/lib/bdev/gpt/gpt.h \ + | awk -F "(" '{ print $2}' | sed 's/)//g' \ + | awk -F ", " '{ print $1 "-" $2 "-" $3 "-" $4 "-" $5}' | sed 's/0x//g'` + # Override the default NRHUGE in scripts/setup.sh export NRHUGE=4096 diff --git a/test/lib/bdev/blockdev.sh b/test/lib/bdev/blockdev.sh index 79e8df2bb..0c7e50607 100755 --- a/test/lib/bdev/blockdev.sh +++ b/test/lib/bdev/blockdev.sh @@ -17,6 +17,29 @@ timing_enter bounds $testdir/bdevio/bdevio $testdir/bdev.conf timing_exit bounds +if [ $(uname -s) = Linux ] && [ -f /usr/sbin/sgdisk ]; then + echo "[Rpc]" >> $testdir/bdev.conf + echo " Enable Yes" >> $testdir/bdev.conf + + if [ ! -z "`grep "Nvme0" $testdir/bdev.conf`" ]; then + modprobe nbd + $testdir/nbd/nbd -c $testdir/bdev.conf -b Nvme0n1 -n /dev/nbd0 & + nbd_pid=$! + echo "Process nbd pid: $nbd_pid" + waitforlisten $nbd_pid 5260 + #if return 1, it will trap, so do not need to consider this case + waitforbdev Nvme0n1 $rootdir/scripts/rpc.py + + if [ -e /dev/nbd0 ]; then + parted -s /dev/nbd0 mklabel gpt mkpart primary '0%' '50%' mkpart primary '50%' '100%' + #change the GUID to SPDK GUID value + /usr/sbin/sgdisk -u 1:$SPDK_GPT_UUID /dev/nbd0 + /usr/sbin/sgdisk -u 2:$SPDK_GPT_UUID /dev/nbd0 + fi + killprocess $nbd_pid + fi +fi + timing_enter verify $testdir/bdevperf/bdevperf -c $testdir/bdev.conf -q 32 -s 4096 -w verify -t 1 timing_exit verify