Spdk/lib/nvmf/ctrlr_discovery.c

242 lines
8.1 KiB
C
Raw Normal View History

/*-
* BSD LICENSE
*
* Copyright (c) Intel Corporation.
* All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* NVMe over Fabrics discovery service
*/
#include "spdk/stdinc.h"
#include "nvmf_internal.h"
#include "transport.h"
#include "spdk/string.h"
#include "spdk/trace.h"
#include "spdk/nvmf_spec.h"
#include "spdk_internal/assert.h"
#include "spdk/log.h"
void
nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn)
{
struct spdk_nvmf_subsystem *discovery_subsystem;
struct spdk_nvmf_ctrlr *ctrlr;
tgt->discovery_genctr++;
discovery_subsystem = spdk_nvmf_tgt_find_subsystem(tgt, SPDK_NVMF_DISCOVERY_NQN);
if (discovery_subsystem) {
/** There is a change in discovery log for hosts with given hostnqn */
TAILQ_FOREACH(ctrlr, &discovery_subsystem->ctrlrs, link) {
if (hostnqn == NULL || strcmp(hostnqn, ctrlr->hostnqn) == 0) {
spdk_thread_send_msg(ctrlr->thread, nvmf_ctrlr_async_event_discovery_log_change_notice, ctrlr);
}
}
}
}
static bool
nvmf_discovery_compare_trtype(const struct spdk_nvme_transport_id *trid1,
const struct spdk_nvme_transport_id *trid2)
{
if (trid1->trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
return strcasecmp(trid1->trstring, trid2->trstring) == 0;
} else {
return trid1->trtype == trid2->trtype;
}
}
static bool
nvmf_discovery_compare_tr_addr(const struct spdk_nvme_transport_id *trid1,
const struct spdk_nvme_transport_id *trid2)
{
return trid1->adrfam == trid2->adrfam && strcasecmp(trid1->traddr, trid2->traddr) == 0;
}
static bool
nvmf_discovery_compare_tr_svcid(const struct spdk_nvme_transport_id *trid1,
const struct spdk_nvme_transport_id *trid2)
{
return strcasecmp(trid1->trsvcid, trid2->trsvcid) == 0;
}
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
static struct spdk_nvmf_discovery_log_page *
nvmf_generate_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn, size_t *log_page_size,
struct spdk_nvme_transport_id *cmd_source_trid)
{
uint64_t numrec = 0;
struct spdk_nvmf_subsystem *subsystem;
struct spdk_nvmf_subsystem_listener *listener;
struct spdk_nvmf_discovery_log_page_entry *entry;
struct spdk_nvmf_discovery_log_page *disc_log;
size_t cur_size;
uint32_t sid;
SPDK_DEBUGLOG(nvmf, "Generating log page for genctr %" PRIu64 "\n",
tgt->discovery_genctr);
cur_size = sizeof(struct spdk_nvmf_discovery_log_page);
disc_log = calloc(1, cur_size);
if (disc_log == NULL) {
SPDK_ERRLOG("Discovery log page memory allocation error\n");
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
return NULL;
}
for (sid = 0; sid < tgt->max_subsystems; sid++) {
subsystem = tgt->subsystems[sid];
if ((subsystem == NULL) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
(subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
continue;
}
if (!spdk_nvmf_subsystem_host_allowed(subsystem, hostnqn)) {
continue;
}
for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
nvmf: fix trid comparison for discovery subsystem entries When generating a discovery log page, we will add entries for the discovery subsystem for all listeners except the one associated with the controller that generated the log page command. We do this comparison using spdk_nvme_transport_id_compare(). But this function compares the subnqn of the trid, and the subnqn is not set in either of the trids that we are comparing. The listener's trid always has an empty subnqn, but the source trid has an uninitialized subnqn when we do the comparison. This means that sometimes the subnqn may be empty (which always happens in debug builds) but sometimes may contain garbage. This means that sometimes an entry would be added to the log, even for the trid of the discovery controller that generated the command (meaning the discovery controller would end up referring to itself which is not allowed). There is an even more subtle issue with this. If the host reads just the log page header, the nvmf target generates the entire log page, and just returns the header contents. Let's say in this case, the source trid has an empty subnqn, so we don't generate an entry for it, and report numrec = X and genctr = Y. Then the host reads the X log page entries. But now the source trid is garbage, so a discovery log page entry is returned, replacing one of the "real" log page entries. And since genctr didn't change, the host thinks the data is all valid, meaning there's a log page entry for an NVM subsystem that ends up getting dropped. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I96cfc566ddaf17153aec089bf3d9b3480bec3e4b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11933 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
2022-03-10 18:33:48 +00:00
if (subsystem->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
struct spdk_nvme_transport_id source_trid = *cmd_source_trid;
struct spdk_nvme_transport_id listener_trid = *listener->trid;
/* Do not generate an entry for the transport ID for the listener
nvmf: fix trid comparison for discovery subsystem entries When generating a discovery log page, we will add entries for the discovery subsystem for all listeners except the one associated with the controller that generated the log page command. We do this comparison using spdk_nvme_transport_id_compare(). But this function compares the subnqn of the trid, and the subnqn is not set in either of the trids that we are comparing. The listener's trid always has an empty subnqn, but the source trid has an uninitialized subnqn when we do the comparison. This means that sometimes the subnqn may be empty (which always happens in debug builds) but sometimes may contain garbage. This means that sometimes an entry would be added to the log, even for the trid of the discovery controller that generated the command (meaning the discovery controller would end up referring to itself which is not allowed). There is an even more subtle issue with this. If the host reads just the log page header, the nvmf target generates the entire log page, and just returns the header contents. Let's say in this case, the source trid has an empty subnqn, so we don't generate an entry for it, and report numrec = X and genctr = Y. Then the host reads the X log page entries. But now the source trid is garbage, so a discovery log page entry is returned, replacing one of the "real" log page entries. And since genctr didn't change, the host thinks the data is all valid, meaning there's a log page entry for an NVM subsystem that ends up getting dropped. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I96cfc566ddaf17153aec089bf3d9b3480bec3e4b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11933 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
2022-03-10 18:33:48 +00:00
* entry associated with the discovery controller that generated
* this command. We compare a copy of the trids, since the trids
* here don't contain the subnqn, and the transport_id_compare()
* function will compare the subnqns.
*/
nvmf: fix trid comparison for discovery subsystem entries When generating a discovery log page, we will add entries for the discovery subsystem for all listeners except the one associated with the controller that generated the log page command. We do this comparison using spdk_nvme_transport_id_compare(). But this function compares the subnqn of the trid, and the subnqn is not set in either of the trids that we are comparing. The listener's trid always has an empty subnqn, but the source trid has an uninitialized subnqn when we do the comparison. This means that sometimes the subnqn may be empty (which always happens in debug builds) but sometimes may contain garbage. This means that sometimes an entry would be added to the log, even for the trid of the discovery controller that generated the command (meaning the discovery controller would end up referring to itself which is not allowed). There is an even more subtle issue with this. If the host reads just the log page header, the nvmf target generates the entire log page, and just returns the header contents. Let's say in this case, the source trid has an empty subnqn, so we don't generate an entry for it, and report numrec = X and genctr = Y. Then the host reads the X log page entries. But now the source trid is garbage, so a discovery log page entry is returned, replacing one of the "real" log page entries. And since genctr didn't change, the host thinks the data is all valid, meaning there's a log page entry for an NVM subsystem that ends up getting dropped. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I96cfc566ddaf17153aec089bf3d9b3480bec3e4b Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11933 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
2022-03-10 18:33:48 +00:00
source_trid.subnqn[0] = '\0';
listener_trid.subnqn[0] = '\0';
if (!spdk_nvme_transport_id_compare(&listener_trid, &source_trid)) {
continue;
}
}
if ((tgt->discovery_filter & SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_TYPE) != 0 &&
!nvmf_discovery_compare_trtype(listener->trid, cmd_source_trid)) {
SPDK_DEBUGLOG(nvmf, "ignore listener type %d (%s) due to type mismatch\n",
listener->trid->trtype, listener->trid->trstring);
continue;
}
if ((tgt->discovery_filter & SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_ADDRESS) != 0 &&
!nvmf_discovery_compare_tr_addr(listener->trid, cmd_source_trid)) {
SPDK_DEBUGLOG(nvmf, "ignore listener addr %s due to addr mismatch\n",
listener->trid->traddr);
continue;
}
if ((tgt->discovery_filter & SPDK_NVMF_TGT_DISCOVERY_MATCH_TRANSPORT_SVCID) != 0 &&
!nvmf_discovery_compare_tr_svcid(listener->trid, cmd_source_trid)) {
SPDK_DEBUGLOG(nvmf, "ignore listener svcid %s due to svcid mismatch\n",
listener->trid->trsvcid);
continue;
}
SPDK_DEBUGLOG(nvmf, "listener %s:%s trtype %s\n", listener->trid->traddr, listener->trid->trsvcid,
listener->trid->trstring);
size_t new_size = cur_size + sizeof(*entry);
void *new_log_page = realloc(disc_log, new_size);
if (new_log_page == NULL) {
SPDK_ERRLOG("Discovery log page memory allocation error\n");
break;
}
disc_log = new_log_page;
cur_size = new_size;
entry = &disc_log->entries[numrec];
memset(entry, 0, sizeof(*entry));
entry->portid = listener->id;
entry->cntlid = 0xffff;
entry->asqsz = listener->transport->opts.max_aq_depth;
entry->subtype = subsystem->subtype;
snprintf(entry->subnqn, sizeof(entry->subnqn), "%s", subsystem->subnqn);
nvmf_transport_listener_discover(listener->transport, listener->trid, entry);
numrec++;
}
}
disc_log->numrec = numrec;
disc_log->genctr = tgt->discovery_genctr;
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
*log_page_size = cur_size;
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
return disc_log;
}
void
nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov,
uint32_t iovcnt, uint64_t offset, uint32_t length,
struct spdk_nvme_transport_id *cmd_source_trid)
{
size_t copy_len = 0;
size_t zero_len = 0;
struct iovec *tmp;
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
size_t log_page_size = 0;
struct spdk_nvmf_discovery_log_page *discovery_log_page;
discovery_log_page = nvmf_generate_discovery_log(tgt, hostnqn, &log_page_size, cmd_source_trid);
/* Copy the valid part of the discovery log page, if any */
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
if (discovery_log_page) {
for (tmp = iov; tmp < iov + iovcnt; tmp++) {
copy_len = spdk_min(tmp->iov_len, length);
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
copy_len = spdk_min(log_page_size - offset, copy_len);
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
memcpy(tmp->iov_base, (char *)discovery_log_page + offset, copy_len);
offset += copy_len;
length -= copy_len;
zero_len = tmp->iov_len - copy_len;
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
if (log_page_size <= offset || length == 0) {
break;
}
}
/* Zero out the rest of the payload */
if (zero_len) {
memset((char *)tmp->iov_base + copy_len, 0, zero_len);
}
for (++tmp; tmp < iov + iovcnt; tmp++) {
memset((char *)tmp->iov_base, 0, tmp->iov_len);
}
nvmf: don't keep a global discovery log page. Keeping a global discovery log page was meant to be a time saving mechanism, but in the current implementation, it doesn't work properly, and can cause undesirable behavior and potential crashes. There are two main problems with keeping a global log page. 1. Admin qpairs can be assigned to any SPDK thread. This means that when multiple initiators connect to the host and request the discovery log, they can both be running through the spdk_nvmf_ctrlr_get_log_page function at the same time. In the event that the discovery generation counter is incremented while these accesses are occurring, it can cause one or both of the threads to update the log at the same time. This results in both logs trying to free the old log page (double free) and set their log as the new one (possible memory leak). 2. The second problem is that each host is supposed to get a unique discovery log based on the subsystems to which they have access. Currently the code relies on whether the discovery log page offset in the request is equal to 0 to determine if it should load a new discovery log page or use the cached one. This is inherently faulty because it relies on initiator provided value to determine what information to provide from the log page. An initiator could easily send a discovery request with an offset greater than 0 on purpose to procure most of a log page provided to another host. Overall, I think it's safest to not cache the log page at all anymore and rely on a thread local fresh log page each time. Reported-by: Curt Bruns <curt.e.bruns@intel.com> Change-Id: Ib048e26f139927d888fed7019e0deec346359582 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/466839 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-08-29 21:54:46 +00:00
free(discovery_log_page);
}
}