From 7fabdd12b69e8e5b74c5eb8cc7480f15f77b52c0 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Wed, 21 Oct 2020 18:32:01 +0300 Subject: [PATCH] rdma: Add functions to support memory translation This is the first commit in a series which aim is to unify MRs registration and memory translation in NVMEoF target and NVME initiator RDMA transports. Next patches will add usage of new functions in RDMA transports. Change-Id: I93153d497fc4554ced14edbe545961b78bda91e3 Signed-off-by: Alexey Marchuk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5121 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Shuhei Matsumoto --- include/spdk_internal/rdma.h | 82 +++++++++++++ lib/rdma/Makefile | 8 +- lib/rdma/common.c | 222 +++++++++++++++++++++++++++++++++++ lib/rdma/spdk_rdma.map | 3 + 4 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 lib/rdma/common.c diff --git a/include/spdk_internal/rdma.h b/include/spdk_internal/rdma.h index 4a6d5104b..bf58de05e 100644 --- a/include/spdk_internal/rdma.h +++ b/include/spdk_internal/rdma.h @@ -38,6 +38,9 @@ #include #include +/* Contains hooks definition */ +#include "spdk/nvme.h" + struct spdk_rdma_qp_init_attr { void *qp_context; struct ibv_cq *send_cq; @@ -58,6 +61,23 @@ struct spdk_rdma_qp { struct spdk_rdma_send_wr_list send_wrs; }; +struct spdk_rdma_mem_map; + +union spdk_rdma_mr { + struct ibv_mr *mr; + uint64_t key; +}; + +enum SPDK_RDMA_TRANSLATION_TYPE { + SPDK_RDMA_TRANSLATION_MR = 0, + SPDK_RDMA_TRANSLATION_KEY +}; + +struct spdk_rdma_memory_translation { + union spdk_rdma_mr mr_or_key; + uint8_t translation_type; +}; + /** * Create RDMA provider specific qpair * \param cm_id Pointer to RDMACM cm_id @@ -114,4 +134,66 @@ bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_s */ int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr); +/** + * Create a memory map which is used to register Memory Regions and perform address -> memory + * key translations + * + * \param pd Protection Domain which will be used to create Memory Regions + * \param hooks Optional hooks which are used to create Protection Domain or ger RKey + * \return Pointer to memory map or NULL on failure + */ +struct spdk_rdma_mem_map *spdk_rdma_create_mem_map(struct ibv_pd *pd, + struct spdk_nvme_rdma_hooks *hooks); + +/** + * Free previously allocated memory map + * + * \param map Pointer to memory map to free + */ +void spdk_rdma_free_mem_map(struct spdk_rdma_mem_map **map); + +/** + * Get a translation for the given address and length. + * + * Note: the user of this function should use address returned in \b translation structure + * + * \param map Pointer to translation map + * \param address Memory address for translation + * \param length Length of the memory address + * \param[in,out] translation Pointer to translation result to be filled by this function + * \retval -EINVAL if translation is not found + * \retval -ERANGE if requested address + length crosses Memory Region boundary + * \retval 0 translation succeed + */ +int spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address, + size_t length, struct spdk_rdma_memory_translation *translation); + +/** + * Helper function for retrieving Local Memory Key. Should be applied to a translation + * returned by \b spdk_rdma_get_translation + * + * \param translation Memory translation + * \return Local Memory Key + */ +static inline uint32_t spdk_rdma_memory_translation_get_lkey(struct spdk_rdma_memory_translation + *translation) +{ + return translation->translation_type == SPDK_RDMA_TRANSLATION_MR ? + translation->mr_or_key.mr->lkey : (uint32_t)translation->mr_or_key.key; +} + +/** + * Helper function for retrieving Remote Memory Key. Should be applied to a translation + * returned by \b spdk_rdma_get_translation + * + * \param translation Memory translation + * \return Remote Memory Key + */ +static inline uint32_t spdk_rdma_memory_translation_get_rkey(struct spdk_rdma_memory_translation + *translation) +{ + return translation->translation_type == SPDK_RDMA_TRANSLATION_MR ? + translation->mr_or_key.mr->rkey : (uint32_t)translation->mr_or_key.key; +} + #endif /* SPDK_RDMA_H */ diff --git a/lib/rdma/Makefile b/lib/rdma/Makefile index e6374557d..1403aff7b 100644 --- a/lib/rdma/Makefile +++ b/lib/rdma/Makefile @@ -35,16 +35,18 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 1 -SO_MINOR := 0 +SO_MINOR := 1 SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_rdma.map) LIBNAME = rdma +C_SRCS = common.c + ifeq ($(CONFIG_RDMA_PROV),verbs) -C_SRCS = rdma_verbs.c +C_SRCS += rdma_verbs.c else ifeq ($(CONFIG_RDMA_PROV),mlx5_dv) -C_SRCS = rdma_mlx5_dv.c +C_SRCS += rdma_mlx5_dv.c LOCAL_SYS_LIBS += -lmlx5 else $(error Wrong RDMA provider specified: $(CONFIG_RDMA_PROV)) diff --git a/lib/rdma/common.c b/lib/rdma/common.c new file mode 100644 index 000000000..21bd51061 --- /dev/null +++ b/lib/rdma/common.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk/likely.h" + +#include "spdk_internal/rdma.h" +#include "spdk_internal/assert.h" + +struct spdk_rdma_mem_map { + struct spdk_mem_map *map; + struct ibv_pd *pd; + struct spdk_nvme_rdma_hooks *hooks; + uint32_t ref_count; + LIST_ENTRY(spdk_rdma_mem_map) link; +}; + +static LIST_HEAD(, spdk_rdma_mem_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps); +static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int +rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map, + enum spdk_mem_map_notify_action action, + void *vaddr, size_t size) +{ + struct spdk_rdma_mem_map *rmap = cb_ctx; + struct ibv_pd *pd = rmap->pd; + struct ibv_mr *mr; + int rc; + + switch (action) { + case SPDK_MEM_MAP_NOTIFY_REGISTER: + if (rmap->hooks && rmap->hooks->get_rkey) { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, rmap->hooks->get_rkey(pd, vaddr, + size)); + } else { + mr = ibv_reg_mr(pd, vaddr, size, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (mr == NULL) { + SPDK_ERRLOG("ibv_reg_mr() failed\n"); + return -1; + } else { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); + } + } + break; + case SPDK_MEM_MAP_NOTIFY_UNREGISTER: + if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) { + mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); + if (mr) { + ibv_dereg_mr(mr); + } + } + rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); + break; + default: + SPDK_UNREACHABLE(); + } + + return rc; +} + +static int +rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) +{ + /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ + return addr_1 == addr_2; +} + +const struct spdk_mem_map_ops g_rdma_map_ops = { + .notify_cb = rdma_mem_notify, + .are_contiguous = rdma_check_contiguous_entries +}; + +static void +_rdma_free_mem_map(struct spdk_rdma_mem_map *map) +{ + assert(map); + + if (map->hooks) { + spdk_free(map); + } else { + free(map); + } +} + +struct spdk_rdma_mem_map * +spdk_rdma_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks) +{ + struct spdk_rdma_mem_map *map; + + pthread_mutex_lock(&g_rdma_mr_maps_mutex); + /* Look up existing mem map registration for this pd */ + LIST_FOREACH(map, &g_rdma_mr_maps, link) { + if (map->pd == pd) { + map->ref_count++; + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return map; + } + } + + if (hooks) { + map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + } else { + map = calloc(1, sizeof(*map)); + } + if (!map) { + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + SPDK_ERRLOG("Memory allocation failed\n"); + return NULL; + } + map->pd = pd; + map->ref_count = 1; + map->hooks = hooks; + map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map); + if (!map->map) { + SPDK_ERRLOG("Unable to create memory map\n"); + _rdma_free_mem_map(map); + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return NULL; + } + LIST_INSERT_HEAD(&g_rdma_mr_maps, map, link); + + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + + return map; +} + +void +spdk_rdma_free_mem_map(struct spdk_rdma_mem_map **_map) +{ + struct spdk_rdma_mem_map *map; + + if (!_map) { + return; + } + + map = *_map; + if (!map) { + return; + } + *_map = NULL; + + pthread_mutex_lock(&g_rdma_mr_maps_mutex); + assert(map->ref_count > 0); + map->ref_count--; + if (map->ref_count != 0) { + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return; + } + + LIST_REMOVE(map, link); + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + if (map->map) { + spdk_mem_map_free(&map->map); + } + _rdma_free_mem_map(map); +} + +int +spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address, + size_t length, struct spdk_rdma_memory_translation *translation) +{ + uint64_t real_length = length; + + assert(map); + assert(address); + assert(translation); + + if (map->hooks && map->hooks->get_rkey) { + translation->translation_type = SPDK_RDMA_TRANSLATION_KEY; + translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length); + } else { + translation->translation_type = SPDK_RDMA_TRANSLATION_MR; + translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address, + &real_length); + if (spdk_unlikely(!translation->mr_or_key.mr)) { + SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length); + return -EINVAL; + } + } + + if (spdk_unlikely(real_length < length)) { + SPDK_ERRLOG("Data buffer %p length %zu split over multiple RDMA Memory Regions\n", address, length); + return -ERANGE; + } + + return 0; +} diff --git a/lib/rdma/spdk_rdma.map b/lib/rdma/spdk_rdma.map index 9268a2191..a39c8dbf2 100644 --- a/lib/rdma/spdk_rdma.map +++ b/lib/rdma/spdk_rdma.map @@ -9,6 +9,9 @@ spdk_rdma_qp_disconnect; spdk_rdma_qp_queue_send_wrs; spdk_rdma_qp_flush_send_wrs; + spdk_rdma_create_mem_map; + spdk_rdma_free_mem_map; + spdk_rdma_get_translation; local: *; };