diff --git a/CHANGELOG.md b/CHANGELOG.md index c3554d601..d48eb780a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## v21.10: (Upcoming Release) +### bdev + +New API `spdk_bdev_get_memory_domains` has been added, it allows to get SPDK memory domains used by bdev. + ### dma A new library, lib/dma, has been added. This library provides the necessary infrastructure for diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index a7fabc790..abd77d9cd 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,6 +81,9 @@ struct spdk_bdev_media_event { */ struct spdk_bdev; +/** Forward declaration of spdk memory domain */ +struct spdk_memory_domain; + /** * Block device remove callback. * @@ -1748,6 +1752,25 @@ void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data size_t spdk_bdev_get_media_events(struct spdk_bdev_desc *bdev_desc, struct spdk_bdev_media_event *events, size_t max_events); +/** + * Get SPDK memory domains used by the given bdev. If bdev reports that it uses memory domains + * that means that it can work with data buffers located in those memory domains. + * + * The user can call this function with \b domains set to NULL and \b array_size set to 0 to get the + * number of memory domains used by bdev + * + * \param bdev Block device + * \param domains Pointer to an array of memory domains to be filled by this function. The user should allocate big enough + * array to keep all memory domains used by bdev and all underlying bdevs + * \param array_size size of \b domains array + * \return the number of entries in \b domains array or negated errno. If returned value is bigger than \b array_size passed by the user + * then the user should increase the size of \b domains array and call this function again. There is no guarantees that + * the content of \b domains array is valid in that case. + * -EINVAL if input parameters were invalid + */ +int spdk_bdev_get_memory_domains(struct spdk_bdev *bdev, struct spdk_memory_domain **domains, + int array_size); + #ifdef __cplusplus } #endif diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 1b44011f1..0c38447a1 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. * All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -246,6 +247,12 @@ struct spdk_bdev_fn_table { /** Get bdev module context. */ void *(*get_module_ctx)(void *ctx); + + /** Get memory domains used by bdev. Optional - may be NULL. + * Vbdev module implementation should call \ref spdk_bdev_get_memory_domains for underlying bdev. + * Vbdev module must inspect types of memory domains returned by base bdev and report only those + * memory domains that it can work with. */ + int (*get_memory_domains)(void *ctx, struct spdk_memory_domain **domains, int array_size); }; /** bdev I/O completion status */ diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile index 383332262..b2f2660bf 100644 --- a/lib/bdev/Makefile +++ b/lib/bdev/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -SO_VER := 7 +SO_VER := 8 SO_MINOR := 0 ifeq ($(CONFIG_VTUNE),y) diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index df5898d1e..76744e973 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -7007,6 +7008,21 @@ bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, return 0; } +int +spdk_bdev_get_memory_domains(struct spdk_bdev *bdev, struct spdk_memory_domain **domains, + int array_size) +{ + if (!bdev) { + return -EINVAL; + } + + if (bdev->fn_table->get_memory_domains) { + return bdev->fn_table->get_memory_domains(bdev->ctxt, domains, array_size); + } + + return 0; +} + SPDK_LOG_REGISTER_COMPONENT(bdev) SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV) diff --git a/lib/bdev/spdk_bdev.map b/lib/bdev/spdk_bdev.map index 2e3df5fd2..f7159e884 100644 --- a/lib/bdev/spdk_bdev.map +++ b/lib/bdev/spdk_bdev.map @@ -94,6 +94,7 @@ spdk_bdev_histogram_enable; spdk_bdev_histogram_get; spdk_bdev_get_media_events; + spdk_bdev_get_memory_domains; # Public functions in bdev_module.h spdk_bdev_register; diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index 036dd61c7..053191f49 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -2739,6 +2739,7 @@ nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr) return 0; } + SPDK_NOTICELOG("nn %u, mem size %lu\n", nn, nn * sizeof(struct spdk_nvme_ns)); ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); if (ctrlr->ns == NULL) { diff --git a/module/bdev/delay/vbdev_delay.c b/module/bdev/delay/vbdev_delay.c index bbab2a403..dfc4935c9 100644 --- a/module/bdev/delay/vbdev_delay.c +++ b/module/bdev/delay/vbdev_delay.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. * All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -671,6 +672,15 @@ vbdev_delay_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx /* No config per bdev needed */ } +static int +vbdev_delay_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) +{ + struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; + + /* Delay bdev doesn't work with data buffers, so it supports any memory domain used by base_bdev */ + return spdk_bdev_get_memory_domains(delay_node->base_bdev, domains, array_size); +} + /* When we register our bdev this is how we specify our entry points. */ static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { .destruct = vbdev_delay_destruct, @@ -679,6 +689,7 @@ static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { .get_io_channel = vbdev_delay_get_io_channel, .dump_info_json = vbdev_delay_dump_info_json, .write_config_json = vbdev_delay_write_config_json, + .get_memory_domains = vbdev_delay_get_memory_domains, }; static void diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index de9584ffc..0ef671c06 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1225,6 +1226,24 @@ _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state) } } +static int +bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) +{ + struct nvme_bdev *nbdev = ctx; + struct spdk_memory_domain *domain; + + domain = spdk_nvme_ctrlr_get_memory_domain(nbdev->nvme_ns->ctrlr->ctrlr); + + if (domain) { + if (array_size > 0 && domains) { + domains[0] = domain; + } + return 1; + } + + return 0; +} + static int bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) { @@ -1383,6 +1402,7 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { .write_config_json = bdev_nvme_write_config_json, .get_spin_time = bdev_nvme_get_spin_time, .get_module_ctx = bdev_nvme_get_module_ctx, + .get_memory_domains = bdev_nvme_get_memory_domains, }; typedef int (*bdev_nvme_parse_ana_log_page_cb)( diff --git a/module/bdev/passthru/vbdev_passthru.c b/module/bdev/passthru/vbdev_passthru.c index 06d245b63..6092f07bc 100644 --- a/module/bdev/passthru/vbdev_passthru.c +++ b/module/bdev/passthru/vbdev_passthru.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. * All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -551,6 +552,15 @@ vbdev_passthru_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ /* No config per bdev needed */ } +static int +vbdev_passthru_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size) +{ + struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx; + + /* Passthru bdev doesn't work with data buffers, so it supports any memory domain used by base_bdev */ + return spdk_bdev_get_memory_domains(pt_node->base_bdev, domains, array_size); +} + /* When we register our bdev this is how we specify our entry points. */ static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = { .destruct = vbdev_passthru_destruct, @@ -559,6 +569,7 @@ static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = { .get_io_channel = vbdev_passthru_get_io_channel, .dump_info_json = vbdev_passthru_dump_info_json, .write_config_json = vbdev_passthru_write_config_json, + .get_memory_domains = vbdev_passthru_get_memory_domains, }; static void diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 3a278d2bd..26bd4b890 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. All rights reserved. * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -4771,6 +4772,52 @@ bdev_multi_allocation(void) } } +static struct spdk_memory_domain *g_bdev_memory_domain = (struct spdk_memory_domain *) 0xf00df00d; + +static int +test_bdev_get_supported_dma_device_types_op(void *ctx, struct spdk_memory_domain **domains, + int array_size) +{ + if (array_size > 0 && domains) { + domains[0] = g_bdev_memory_domain; + } + + return 1; +} + +static void +bdev_get_memory_domains(void) +{ + struct spdk_bdev_fn_table fn_table = { + .get_memory_domains = test_bdev_get_supported_dma_device_types_op + }; + struct spdk_bdev bdev = { .fn_table = &fn_table }; + struct spdk_memory_domain *domains[2] = {}; + int rc; + + /* bdev is NULL */ + rc = spdk_bdev_get_memory_domains(NULL, domains, 2); + CU_ASSERT(rc == -EINVAL); + + /* domains is NULL */ + rc = spdk_bdev_get_memory_domains(&bdev, NULL, 2); + CU_ASSERT(rc == 1); + + /* array size is 0 */ + rc = spdk_bdev_get_memory_domains(&bdev, domains, 0); + CU_ASSERT(rc == 1); + + /* get_supported_dma_device_types op is set */ + rc = spdk_bdev_get_memory_domains(&bdev, domains, 2); + CU_ASSERT(rc == 1); + CU_ASSERT(domains[0] == g_bdev_memory_domain); + + /* get_supported_dma_device_types op is not set */ + fn_table.get_memory_domains = NULL; + rc = spdk_bdev_get_memory_domains(&bdev, domains, 2); + CU_ASSERT(rc == 0); +} + int main(int argc, char **argv) { @@ -4816,6 +4863,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, bdev_write_zeroes_split_test); CU_ADD_TEST(suite, bdev_set_options_test); CU_ADD_TEST(suite, bdev_multi_allocation); + CU_ADD_TEST(suite, bdev_get_memory_domains); allocate_cores(1); allocate_threads(1); diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 0958f0250..01e6eee2e 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -3,6 +3,7 @@ * * Copyright (c) Intel Corporation. * All rights reserved. + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,6 +74,15 @@ DEFINE_STUB(spdk_nvme_ctrlr_get_flags, uint64_t, (struct spdk_nvme_ctrlr *ctrlr) DEFINE_STUB(accel_engine_create_cb, int, (void *io_device, void *ctx_buf), 0); DEFINE_STUB_V(accel_engine_destroy_cb, (void *io_device, void *ctx_buf)); +DEFINE_RETURN_MOCK(spdk_nvme_ctrlr_get_memory_domain, struct spdk_memory_domain *); + +struct spdk_memory_domain *spdk_nvme_ctrlr_get_memory_domain(const struct spdk_nvme_ctrlr *ctrlr) +{ + HANDLE_RETURN_MOCK(spdk_nvme_ctrlr_get_memory_domain); + + return NULL; +} + struct spdk_io_channel * spdk_accel_engine_get_io_channel(void) { @@ -2541,6 +2551,40 @@ fini_accel(void) spdk_io_device_unregister(g_accel_p, NULL); } +static void +test_get_memory_domains(void) +{ + struct nvme_ctrlr ctrlr = { .ctrlr = (struct spdk_nvme_ctrlr *) 0xbaadbeef }; + struct nvme_ns ns = { .ctrlr = &ctrlr }; + struct nvme_bdev nbdev = { .nvme_ns = &ns }; + struct spdk_memory_domain *domain = (struct spdk_memory_domain *) 0xf00df00d; + struct spdk_memory_domain *domains[2] = {}; + int rc = 0; + + /* nvme controller doesn't have a memory domain */ + MOCK_SET(spdk_nvme_ctrlr_get_memory_domain, NULL); + rc = bdev_nvme_get_memory_domains(&nbdev, domains, 2); + CU_ASSERT(rc == 0) + + /* nvme controller has a memory domain but array size is insufficient */ + MOCK_SET(spdk_nvme_ctrlr_get_memory_domain, domain); + rc = bdev_nvme_get_memory_domains(&nbdev, domains, 0); + CU_ASSERT(rc == 1); + + /* nvme controller has a memory domain but domains array is NULL */ + MOCK_SET(spdk_nvme_ctrlr_get_memory_domain, domain); + rc = bdev_nvme_get_memory_domains(&nbdev, domains, 0); + CU_ASSERT(rc == 1); + + /* nvme controller has a memory domain */ + MOCK_SET(spdk_nvme_ctrlr_get_memory_domain, domain); + rc = bdev_nvme_get_memory_domains(&nbdev, domains, 1); + CU_ASSERT(rc == 1); + CU_ASSERT(domains[0] == domain); + + MOCK_CLEAR(spdk_nvme_ctrlr_get_memory_domain); +} + int main(int argc, const char **argv) { @@ -2567,6 +2611,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_bdev_unregister); CU_ADD_TEST(suite, test_compare_ns); CU_ADD_TEST(suite, test_init_ana_log_page); + CU_ADD_TEST(suite, test_get_memory_domains); CU_basic_set_mode(CU_BRM_VERBOSE);