diff --git a/include/spdk_internal/rdma.h b/include/spdk_internal/rdma.h index 4a6d5104b..bf58de05e 100644 --- a/include/spdk_internal/rdma.h +++ b/include/spdk_internal/rdma.h @@ -38,6 +38,9 @@ #include #include +/* Contains hooks definition */ +#include "spdk/nvme.h" + struct spdk_rdma_qp_init_attr { void *qp_context; struct ibv_cq *send_cq; @@ -58,6 +61,23 @@ struct spdk_rdma_qp { struct spdk_rdma_send_wr_list send_wrs; }; +struct spdk_rdma_mem_map; + +union spdk_rdma_mr { + struct ibv_mr *mr; + uint64_t key; +}; + +enum SPDK_RDMA_TRANSLATION_TYPE { + SPDK_RDMA_TRANSLATION_MR = 0, + SPDK_RDMA_TRANSLATION_KEY +}; + +struct spdk_rdma_memory_translation { + union spdk_rdma_mr mr_or_key; + uint8_t translation_type; +}; + /** * Create RDMA provider specific qpair * \param cm_id Pointer to RDMACM cm_id @@ -114,4 +134,66 @@ bool spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_s */ int spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr); +/** + * Create a memory map which is used to register Memory Regions and perform address -> memory + * key translations + * + * \param pd Protection Domain which will be used to create Memory Regions + * \param hooks Optional hooks which are used to create Protection Domain or ger RKey + * \return Pointer to memory map or NULL on failure + */ +struct spdk_rdma_mem_map *spdk_rdma_create_mem_map(struct ibv_pd *pd, + struct spdk_nvme_rdma_hooks *hooks); + +/** + * Free previously allocated memory map + * + * \param map Pointer to memory map to free + */ +void spdk_rdma_free_mem_map(struct spdk_rdma_mem_map **map); + +/** + * Get a translation for the given address and length. + * + * Note: the user of this function should use address returned in \b translation structure + * + * \param map Pointer to translation map + * \param address Memory address for translation + * \param length Length of the memory address + * \param[in,out] translation Pointer to translation result to be filled by this function + * \retval -EINVAL if translation is not found + * \retval -ERANGE if requested address + length crosses Memory Region boundary + * \retval 0 translation succeed + */ +int spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address, + size_t length, struct spdk_rdma_memory_translation *translation); + +/** + * Helper function for retrieving Local Memory Key. Should be applied to a translation + * returned by \b spdk_rdma_get_translation + * + * \param translation Memory translation + * \return Local Memory Key + */ +static inline uint32_t spdk_rdma_memory_translation_get_lkey(struct spdk_rdma_memory_translation + *translation) +{ + return translation->translation_type == SPDK_RDMA_TRANSLATION_MR ? + translation->mr_or_key.mr->lkey : (uint32_t)translation->mr_or_key.key; +} + +/** + * Helper function for retrieving Remote Memory Key. Should be applied to a translation + * returned by \b spdk_rdma_get_translation + * + * \param translation Memory translation + * \return Remote Memory Key + */ +static inline uint32_t spdk_rdma_memory_translation_get_rkey(struct spdk_rdma_memory_translation + *translation) +{ + return translation->translation_type == SPDK_RDMA_TRANSLATION_MR ? + translation->mr_or_key.mr->rkey : (uint32_t)translation->mr_or_key.key; +} + #endif /* SPDK_RDMA_H */ diff --git a/lib/rdma/Makefile b/lib/rdma/Makefile index e6374557d..1403aff7b 100644 --- a/lib/rdma/Makefile +++ b/lib/rdma/Makefile @@ -35,16 +35,18 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 1 -SO_MINOR := 0 +SO_MINOR := 1 SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_rdma.map) LIBNAME = rdma +C_SRCS = common.c + ifeq ($(CONFIG_RDMA_PROV),verbs) -C_SRCS = rdma_verbs.c +C_SRCS += rdma_verbs.c else ifeq ($(CONFIG_RDMA_PROV),mlx5_dv) -C_SRCS = rdma_mlx5_dv.c +C_SRCS += rdma_mlx5_dv.c LOCAL_SYS_LIBS += -lmlx5 else $(error Wrong RDMA provider specified: $(CONFIG_RDMA_PROV)) diff --git a/lib/rdma/common.c b/lib/rdma/common.c new file mode 100644 index 000000000..21bd51061 --- /dev/null +++ b/lib/rdma/common.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/log.h" +#include "spdk/env.h" +#include "spdk/likely.h" + +#include "spdk_internal/rdma.h" +#include "spdk_internal/assert.h" + +struct spdk_rdma_mem_map { + struct spdk_mem_map *map; + struct ibv_pd *pd; + struct spdk_nvme_rdma_hooks *hooks; + uint32_t ref_count; + LIST_ENTRY(spdk_rdma_mem_map) link; +}; + +static LIST_HEAD(, spdk_rdma_mem_map) g_rdma_mr_maps = LIST_HEAD_INITIALIZER(&g_rdma_mr_maps); +static pthread_mutex_t g_rdma_mr_maps_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int +rdma_mem_notify(void *cb_ctx, struct spdk_mem_map *map, + enum spdk_mem_map_notify_action action, + void *vaddr, size_t size) +{ + struct spdk_rdma_mem_map *rmap = cb_ctx; + struct ibv_pd *pd = rmap->pd; + struct ibv_mr *mr; + int rc; + + switch (action) { + case SPDK_MEM_MAP_NOTIFY_REGISTER: + if (rmap->hooks && rmap->hooks->get_rkey) { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, rmap->hooks->get_rkey(pd, vaddr, + size)); + } else { + mr = ibv_reg_mr(pd, vaddr, size, + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE); + if (mr == NULL) { + SPDK_ERRLOG("ibv_reg_mr() failed\n"); + return -1; + } else { + rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); + } + } + break; + case SPDK_MEM_MAP_NOTIFY_UNREGISTER: + if (rmap->hooks == NULL || rmap->hooks->get_rkey == NULL) { + mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); + if (mr) { + ibv_dereg_mr(mr); + } + } + rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); + break; + default: + SPDK_UNREACHABLE(); + } + + return rc; +} + +static int +rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2) +{ + /* Two contiguous mappings will point to the same address which is the start of the RDMA MR. */ + return addr_1 == addr_2; +} + +const struct spdk_mem_map_ops g_rdma_map_ops = { + .notify_cb = rdma_mem_notify, + .are_contiguous = rdma_check_contiguous_entries +}; + +static void +_rdma_free_mem_map(struct spdk_rdma_mem_map *map) +{ + assert(map); + + if (map->hooks) { + spdk_free(map); + } else { + free(map); + } +} + +struct spdk_rdma_mem_map * +spdk_rdma_create_mem_map(struct ibv_pd *pd, struct spdk_nvme_rdma_hooks *hooks) +{ + struct spdk_rdma_mem_map *map; + + pthread_mutex_lock(&g_rdma_mr_maps_mutex); + /* Look up existing mem map registration for this pd */ + LIST_FOREACH(map, &g_rdma_mr_maps, link) { + if (map->pd == pd) { + map->ref_count++; + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return map; + } + } + + if (hooks) { + map = spdk_zmalloc(sizeof(*map), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + } else { + map = calloc(1, sizeof(*map)); + } + if (!map) { + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + SPDK_ERRLOG("Memory allocation failed\n"); + return NULL; + } + map->pd = pd; + map->ref_count = 1; + map->hooks = hooks; + map->map = spdk_mem_map_alloc(0, &g_rdma_map_ops, map); + if (!map->map) { + SPDK_ERRLOG("Unable to create memory map\n"); + _rdma_free_mem_map(map); + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return NULL; + } + LIST_INSERT_HEAD(&g_rdma_mr_maps, map, link); + + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + + return map; +} + +void +spdk_rdma_free_mem_map(struct spdk_rdma_mem_map **_map) +{ + struct spdk_rdma_mem_map *map; + + if (!_map) { + return; + } + + map = *_map; + if (!map) { + return; + } + *_map = NULL; + + pthread_mutex_lock(&g_rdma_mr_maps_mutex); + assert(map->ref_count > 0); + map->ref_count--; + if (map->ref_count != 0) { + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + return; + } + + LIST_REMOVE(map, link); + pthread_mutex_unlock(&g_rdma_mr_maps_mutex); + if (map->map) { + spdk_mem_map_free(&map->map); + } + _rdma_free_mem_map(map); +} + +int +spdk_rdma_get_translation(struct spdk_rdma_mem_map *map, void *address, + size_t length, struct spdk_rdma_memory_translation *translation) +{ + uint64_t real_length = length; + + assert(map); + assert(address); + assert(translation); + + if (map->hooks && map->hooks->get_rkey) { + translation->translation_type = SPDK_RDMA_TRANSLATION_KEY; + translation->mr_or_key.key = spdk_mem_map_translate(map->map, (uint64_t)address, &real_length); + } else { + translation->translation_type = SPDK_RDMA_TRANSLATION_MR; + translation->mr_or_key.mr = (struct ibv_mr *)spdk_mem_map_translate(map->map, (uint64_t)address, + &real_length); + if (spdk_unlikely(!translation->mr_or_key.mr)) { + SPDK_ERRLOG("No translation for ptr %p, size %zu\n", address, length); + return -EINVAL; + } + } + + if (spdk_unlikely(real_length < length)) { + SPDK_ERRLOG("Data buffer %p length %zu split over multiple RDMA Memory Regions\n", address, length); + return -ERANGE; + } + + return 0; +} diff --git a/lib/rdma/spdk_rdma.map b/lib/rdma/spdk_rdma.map index 9268a2191..a39c8dbf2 100644 --- a/lib/rdma/spdk_rdma.map +++ b/lib/rdma/spdk_rdma.map @@ -9,6 +9,9 @@ spdk_rdma_qp_disconnect; spdk_rdma_qp_queue_send_wrs; spdk_rdma_qp_flush_send_wrs; + spdk_rdma_create_mem_map; + spdk_rdma_free_mem_map; + spdk_rdma_get_translation; local: *; };