diff --git a/include/spdk_internal/rdma.h b/include/spdk_internal/rdma.h index 17b4ecba2..983b25645 100644 --- a/include/spdk_internal/rdma.h +++ b/include/spdk_internal/rdma.h @@ -273,4 +273,20 @@ spdk_rdma_memory_translation_get_rkey(struct spdk_rdma_memory_translation translation->mr_or_key.mr->rkey : (uint32_t)translation->mr_or_key.key; } +/** + * Get a Protection Domain for an RDMA device context. + * + * \param context RDMA device context + * \return Pointer to the allocated Protection Domain + */ +struct ibv_pd * +spdk_rdma_get_pd(struct ibv_context *context); + +/** + * Return a Protection Domain. + * + * \param pd Pointer to the Protection Domain + */ +void spdk_rdma_put_pd(struct ibv_pd *pd); + #endif /* SPDK_RDMA_H */ diff --git a/lib/rdma/Makefile b/lib/rdma/Makefile index 80e90d9bc..f414a9610 100644 --- a/lib/rdma/Makefile +++ b/lib/rdma/Makefile @@ -7,7 +7,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 4 -SO_MINOR := 0 +SO_MINOR := 1 SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_rdma.map) diff --git a/lib/rdma/common.c b/lib/rdma/common.c index c0cc77360..9e5604f68 100644 --- a/lib/rdma/common.c +++ b/lib/rdma/common.c @@ -13,6 +13,14 @@ #include "spdk_internal/rdma.h" #include "spdk_internal/assert.h" +struct spdk_rdma_device { + struct ibv_pd *pd; + struct ibv_context *context; + int ref; + bool removed; + TAILQ_ENTRY(spdk_rdma_device) tailq; +}; + struct spdk_rdma_mem_map { struct spdk_mem_map *map; struct ibv_pd *pd; @@ -22,6 +30,10 @@ struct spdk_rdma_mem_map { LIST_ENTRY(spdk_rdma_mem_map) link; }; +static pthread_mutex_t g_dev_mutex = PTHREAD_MUTEX_INITIALIZER; +static struct ibv_context **g_ctx_list = NULL; +static TAILQ_HEAD(, spdk_rdma_device) g_dev_list = TAILQ_HEAD_INITIALIZER(g_dev_list); + static LIST_HEAD(, spdk_rdma_mem_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps); static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -350,3 +362,209 @@ spdk_rdma_qp_flush_recv_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_recv_w return rc; } + +static struct spdk_rdma_device * +rdma_add_dev(struct ibv_context *context) +{ + struct spdk_rdma_device *dev; + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) { + SPDK_ERRLOG("Failed to allocate RDMA device object.\n"); + return NULL; + } + + dev->pd = ibv_alloc_pd(context); + if (dev->pd == NULL) { + SPDK_ERRLOG("ibv_alloc_pd() failed: %s (%d)\n", spdk_strerror(errno), errno); + free(dev); + return NULL; + } + + dev->context = context; + TAILQ_INSERT_TAIL(&g_dev_list, dev, tailq); + + return dev; +} + +static void +rdma_remove_dev(struct spdk_rdma_device *dev) +{ + if (!dev->removed || dev->ref > 0) { + return; + } + + /* Deallocate protection domain only if the device is already removed and + * there is no reference. + */ + TAILQ_REMOVE(&g_dev_list, dev, tailq); + ibv_dealloc_pd(dev->pd); + free(dev); +} + +static int +ctx_cmp(const void *_c1, const void *_c2) +{ + struct ibv_context *c1 = *(struct ibv_context **)_c1; + struct ibv_context *c2 = *(struct ibv_context **)_c2; + + return c1 < c2 ? -1 : c1 > c2; +} + +static int +rdma_sync_dev_list(void) +{ + struct ibv_context **new_ctx_list; + int i, j; + int num_devs = 0; + + /* + * rdma_get_devices() returns a NULL terminated array of opened RDMA devices, + * and sets num_devs to the number of the returned devices. + */ + new_ctx_list = rdma_get_devices(&num_devs); + if (new_ctx_list == NULL) { + SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno); + return -ENODEV; + } + + if (num_devs == 0) { + rdma_free_devices(new_ctx_list); + SPDK_ERRLOG("Returned RDMA device array was empty\n"); + return -ENODEV; + } + + /* + * Sort new_ctx_list by addresses to update devices easily. + */ + qsort(new_ctx_list, num_devs, sizeof(struct ibv_context *), ctx_cmp); + + if (g_ctx_list == NULL) { + /* If no old array, this is the first call. Add all devices. */ + for (i = 0; new_ctx_list[i] != NULL; i++) { + rdma_add_dev(new_ctx_list[i]); + } + + goto exit; + } + + for (i = j = 0; new_ctx_list[i] != NULL || g_ctx_list[j] != NULL;) { + struct ibv_context *new_ctx = new_ctx_list[i]; + struct ibv_context *old_ctx = g_ctx_list[j]; + bool add = false, remove = false; + + /* + * If a context exists only in the new array, create a device for it, + * or if a context exists only in the old array, try removing the + * corresponding device. + */ + + if (old_ctx == NULL) { + add = true; + } else if (new_ctx == NULL) { + remove = true; + } else if (new_ctx < old_ctx) { + add = true; + } else if (old_ctx < new_ctx) { + remove = true; + } + + if (add) { + rdma_add_dev(new_ctx_list[i]); + i++; + } else if (remove) { + struct spdk_rdma_device *dev, *tmp; + + TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { + if (dev->context == g_ctx_list[j]) { + dev->removed = true; + rdma_remove_dev(dev); + } + } + j++; + } else { + i++; + j++; + } + } + + /* Free the old array. */ + rdma_free_devices(g_ctx_list); + +exit: + /* + * Keep the newly returned array so that allocated protection domains + * are not freed unexpectedly. + */ + g_ctx_list = new_ctx_list; + return 0; +} + +struct ibv_pd * +spdk_rdma_get_pd(struct ibv_context *context) +{ + struct spdk_rdma_device *dev; + int rc; + + pthread_mutex_lock(&g_dev_mutex); + + rc = rdma_sync_dev_list(); + if (rc != 0) { + pthread_mutex_unlock(&g_dev_mutex); + + SPDK_ERRLOG("Failed to sync RDMA device list\n"); + return NULL; + } + + TAILQ_FOREACH(dev, &g_dev_list, tailq) { + if (dev->context == context && !dev->removed) { + dev->ref++; + pthread_mutex_unlock(&g_dev_mutex); + + return dev->pd; + } + } + + pthread_mutex_unlock(&g_dev_mutex); + + SPDK_ERRLOG("Failed to get PD\n"); + return NULL; +} + +void +spdk_rdma_put_pd(struct ibv_pd *pd) +{ + struct spdk_rdma_device *dev, *tmp; + + pthread_mutex_lock(&g_dev_mutex); + + TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { + if (dev->pd == pd) { + assert(dev->ref > 0); + dev->ref--; + + rdma_remove_dev(dev); + } + } + + rdma_sync_dev_list(); + + pthread_mutex_unlock(&g_dev_mutex); +} + +__attribute__((destructor)) static void +_rdma_fini(void) +{ + struct spdk_rdma_device *dev, *tmp; + + TAILQ_FOREACH_SAFE(dev, &g_dev_list, tailq, tmp) { + dev->removed = true; + dev->ref = 0; + rdma_remove_dev(dev); + } + + if (g_ctx_list != NULL) { + rdma_free_devices(g_ctx_list); + g_ctx_list = NULL; + } +} diff --git a/lib/rdma/spdk_rdma.map b/lib/rdma/spdk_rdma.map index 9043cd47b..7b4b68264 100644 --- a/lib/rdma/spdk_rdma.map +++ b/lib/rdma/spdk_rdma.map @@ -18,6 +18,8 @@ spdk_rdma_get_translation; spdk_rdma_qp_queue_recv_wrs; spdk_rdma_qp_flush_recv_wrs; + spdk_rdma_get_pd; + spdk_rdma_put_pd; local: *; }; diff --git a/test/unit/lib/Makefile b/test/unit/lib/Makefile index 5269b83ad..8b7ab97cb 100644 --- a/test/unit/lib/Makefile +++ b/test/unit/lib/Makefile @@ -13,6 +13,7 @@ DIRS-$(CONFIG_REDUCE) += reduce ifeq ($(OS),Linux) DIRS-$(CONFIG_VHOST) += vhost DIRS-y += ftl +DIRS-$(CONFIG_RDMA) += rdma endif .PHONY: all clean $(DIRS-y) diff --git a/test/unit/lib/rdma/Makefile b/test/unit/lib/rdma/Makefile new file mode 100644 index 000000000..61cd47eed --- /dev/null +++ b/test/unit/lib/rdma/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = common.c + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/test/unit/lib/rdma/common.c/.gitignore b/test/unit/lib/rdma/common.c/.gitignore new file mode 100644 index 000000000..602ae4561 --- /dev/null +++ b/test/unit/lib/rdma/common.c/.gitignore @@ -0,0 +1 @@ +common_ut diff --git a/test/unit/lib/rdma/common.c/Makefile b/test/unit/lib/rdma/common.c/Makefile new file mode 100644 index 000000000..5832b9ec6 --- /dev/null +++ b/test/unit/lib/rdma/common.c/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +TEST_FILE = common_ut.c + +include $(SPDK_ROOT_DIR)/mk/spdk.unittest.mk diff --git a/test/unit/lib/rdma/common.c/common_ut.c b/test/unit/lib/rdma/common.c/common_ut.c new file mode 100644 index 000000000..7c132920a --- /dev/null +++ b/test/unit/lib/rdma/common.c/common_ut.c @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#include "spdk/stdinc.h" +#include "spdk_cunit.h" +#include "spdk_internal/mock.h" +#include "common/lib/test_env.c" +#include "rdma/common.c" + +DEFINE_STUB(spdk_mem_map_alloc, struct spdk_mem_map *, (uint64_t default_translation, + const struct spdk_mem_map_ops *ops, void *cb_ctx), NULL); +DEFINE_STUB_V(spdk_mem_map_free, (struct spdk_mem_map **pmap)); +DEFINE_STUB(spdk_mem_map_set_translation, int, (struct spdk_mem_map *map, uint64_t vaddr, + uint64_t size, uint64_t translation), 0); +DEFINE_STUB(spdk_mem_map_clear_translation, int, (struct spdk_mem_map *map, uint64_t vaddr, + uint64_t size), 0); +DEFINE_STUB(spdk_mem_map_translate, uint64_t, (const struct spdk_mem_map *map, uint64_t vaddr, + uint64_t *size), 0); + +struct ut_rdma_device { + struct ibv_context *context; + bool removed; + TAILQ_ENTRY(ut_rdma_device) tailq; +}; + +static TAILQ_HEAD(, ut_rdma_device) g_ut_dev_list = TAILQ_HEAD_INITIALIZER(g_ut_dev_list); + +struct ibv_context ** +rdma_get_devices(int *num_devices) +{ + struct ibv_context **ctx_list; + struct ut_rdma_device *ut_dev; + int num_ut_devs = 0; + int i = 0; + + TAILQ_FOREACH(ut_dev, &g_ut_dev_list, tailq) { + if (!ut_dev->removed) { + num_ut_devs++; + } + } + + ctx_list = malloc(sizeof(*ctx_list) * (num_ut_devs + 1)); + SPDK_CU_ASSERT_FATAL(ctx_list); + + TAILQ_FOREACH(ut_dev, &g_ut_dev_list, tailq) { + if (!ut_dev->removed) { + ctx_list[i++] = ut_dev->context; + } + } + ctx_list[i] = NULL; + + if (num_devices) { + *num_devices = num_ut_devs; + } + + return ctx_list; +} + +void +rdma_free_devices(struct ibv_context **list) +{ + free(list); +} + +struct ibv_pd * +ibv_alloc_pd(struct ibv_context *context) +{ + struct ibv_pd *pd; + struct ut_rdma_device *ut_dev; + + TAILQ_FOREACH(ut_dev, &g_ut_dev_list, tailq) { + if (ut_dev->context == context && !ut_dev->removed) { + break; + } + } + + if (!ut_dev) { + return NULL; + } + + pd = calloc(1, sizeof(*pd)); + SPDK_CU_ASSERT_FATAL(pd); + + pd->context = context; + + return pd; +} + +int +ibv_dealloc_pd(struct ibv_pd *pd) +{ + free(pd); + + return 0; +} + +static struct ut_rdma_device * +ut_rdma_add_dev(struct ibv_context *context) +{ + struct ut_rdma_device *ut_dev; + + ut_dev = calloc(1, sizeof(*ut_dev)); + if (!ut_dev) { + return NULL; + } + + ut_dev->context = context; + TAILQ_INSERT_TAIL(&g_ut_dev_list, ut_dev, tailq); + + return ut_dev; +} + +static void +ut_rdma_remove_dev(struct ut_rdma_device *ut_dev) +{ + TAILQ_REMOVE(&g_ut_dev_list, ut_dev, tailq); + free(ut_dev); +} + +static struct spdk_rdma_device * +_rdma_get_dev(struct ibv_context *context) +{ + struct spdk_rdma_device *dev; + + TAILQ_FOREACH(dev, &g_dev_list, tailq) { + if (dev->context == context) { + break; + } + } + + return dev; +} + +static void +test_spdk_rdma_pd(void) +{ + struct ut_rdma_device *ut_dev0, *ut_dev1, *ut_dev2; + struct ibv_pd *pd1, *pd1_1, *pd2; + + ut_dev0 = ut_rdma_add_dev((struct ibv_context *)0xface); + SPDK_CU_ASSERT_FATAL(ut_dev0 != NULL); + + ut_dev1 = ut_rdma_add_dev((struct ibv_context *)0xc0ffee); + SPDK_CU_ASSERT_FATAL(ut_dev1 != NULL); + + ut_dev2 = ut_rdma_add_dev((struct ibv_context *)0xf00d); + SPDK_CU_ASSERT_FATAL(ut_dev2 != NULL); + + /* There are ut_dev0 and ut_dev1. */ + ut_dev2->removed = true; + + /* Call spdk_rdma_get_pd() to non-existent ut_dev2. */ + pd2 = spdk_rdma_get_pd(ut_dev2->context); + + /* Then, spdk_rdma_get_pd() should return NULL and g_dev_list should have dev0 and dev1. */ + CU_ASSERT(pd2 == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev0->context) != NULL); + CU_ASSERT(_rdma_get_dev(ut_dev1->context) != NULL); + CU_ASSERT(_rdma_get_dev(ut_dev2->context) == NULL); + + /* Remove ut_dev0 and add ut_dev2. */ + ut_dev0->removed = true; + ut_dev2->removed = false; + + /* Call spdk_rdma_get_pd() to ut_dev1. */ + pd1 = spdk_rdma_get_pd(ut_dev1->context); + + /* Then, spdk_rdma_get_pd() should return pd1 and g_dev_list should have dev1 and dev2. */ + CU_ASSERT(pd1 != NULL); + CU_ASSERT(_rdma_get_dev(ut_dev0->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev1->context) != NULL); + CU_ASSERT(_rdma_get_dev(ut_dev2->context) != NULL); + + /* Remove ut_dev1. */ + ut_dev1->removed = true; + + /* Call spdk_rdma_get_pd() again to ut_dev1 which does not exist anymore. */ + pd1_1 = spdk_rdma_get_pd(ut_dev1->context); + + /* Then, spdk_rdma_get_pd() should return NULL and g_dev_list should still have dev1. */ + CU_ASSERT(pd1_1 == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev0->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev1->context) != NULL); + CU_ASSERT(_rdma_get_dev(ut_dev2->context) != NULL); + + /* Call spdk_rdma_put_pd() to pd1. */ + spdk_rdma_put_pd(pd1); + + /* Then, dev1 should be removed from g_dev_list. */ + CU_ASSERT(_rdma_get_dev(ut_dev0->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev1->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev2->context) != NULL); + + /* Call spdk_rdma_get_pd() to ut_dev2. */ + pd2 = spdk_rdma_get_pd(ut_dev2->context); + + /* spdk_rdma_get_pd() should succeed and g_dev_list should still have dev2 + * even after spdk_rdma_put_pd() is called to pd2. + */ + CU_ASSERT(pd2 != NULL); + + spdk_rdma_put_pd(pd2); + + CU_ASSERT(_rdma_get_dev(ut_dev0->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev1->context) == NULL); + CU_ASSERT(_rdma_get_dev(ut_dev2->context) != NULL); + + _rdma_fini(); + + ut_rdma_remove_dev(ut_dev0); + ut_rdma_remove_dev(ut_dev1); + ut_rdma_remove_dev(ut_dev2); +} + +int +main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + CU_set_error_action(CUEA_ABORT); + CU_initialize_registry(); + + suite = CU_add_suite("rdma_common", NULL, NULL); + CU_ADD_TEST(suite, test_spdk_rdma_pd); + + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/test/unit/unittest.sh b/test/unit/unittest.sh index ae96b1920..9c3550c21 100755 --- a/test/unit/unittest.sh +++ b/test/unit/unittest.sh @@ -231,6 +231,7 @@ run_test "unittest_lvol" $valgrind $testdir/lib/lvol/lvol.c/lvol_ut if grep -q '#define SPDK_CONFIG_RDMA 1' $rootdir/include/spdk/config.h; then run_test "unittest_nvme_rdma" $valgrind $testdir/lib/nvme/nvme_rdma.c/nvme_rdma_ut run_test "unittest_nvmf_transport" $valgrind $testdir/lib/nvmf/transport.c/transport_ut + run_test "unittest_rdma" $valgrind $testdir/lib/rdma/common.c/common_ut fi if grep -q '#define SPDK_CONFIG_NVME_CUSE 1' $rootdir/include/spdk/config.h; then