From d648dde682914f61b5cf12d47b4484368e81f00c Mon Sep 17 00:00:00 2001 From: Ziye Yang Date: Thu, 16 Jan 2020 00:51:04 +0800 Subject: [PATCH] lib/iscsi: Use asychronized writev for sending data on sockets This patch eliminates the flushing logic and simplies the writev logic. And this patch can also improve the performance. We support async write for PDUs other than login response, logout response, and text response in this patch. We will support async write also for them later in this patch series. Signed-off-by: Ziye Yang Change-Id: I243f598f297d594da0bb18466bc47dab918ed3ee Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/481686 Reviewed-by: Shuhei Matsumoto Reviewed-by: Ben Walker Reviewed-by: Jim Harris Community-CI: SPDK CI Jenkins Tested-by: SPDK CI Jenkins --- lib/iscsi/conn.c | 240 +++++++-------------------- lib/iscsi/conn.h | 1 - lib/iscsi/iscsi.h | 17 ++ test/unit/lib/iscsi/conn.c/conn_ut.c | 46 ----- 4 files changed, 75 insertions(+), 229 deletions(-) diff --git a/lib/iscsi/conn.c b/lib/iscsi/conn.c index 8c8bdca2a..7c727ddbb 100644 --- a/lib/iscsi/conn.c +++ b/lib/iscsi/conn.c @@ -193,8 +193,6 @@ iscsi_poll_group_remove_conn(struct spdk_iscsi_poll_group *pg, struct spdk_iscsi SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn); } - spdk_poller_unregister(&conn->flush_poller); - conn->is_stopped = true; STAILQ_REMOVE(&pg->connections, conn, spdk_iscsi_conn, link); } @@ -1380,36 +1378,6 @@ spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn, return SPDK_ISCSI_CONNECTION_FATAL; } -static int -iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest, - int data_digest) -{ - int data_len, enable_digest, total; - - enable_digest = 1; - if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) { - enable_digest = 0; - } - - total = ISCSI_BHS_LEN; - - total += (4 * pdu->bhs.total_ahs_len); - - if (enable_digest && header_digest) { - total += ISCSI_DIGEST_LEN; - } - - data_len = DGET24(pdu->bhs.data_segment_len); - if (data_len > 0) { - total += ISCSI_ALIGN(data_len); - if (enable_digest && data_digest) { - total += ISCSI_DIGEST_LEN; - } - } - - return total; -} - static bool iscsi_is_free_pdu_deferred(struct spdk_iscsi_pdu *pdu) { @@ -1425,154 +1393,6 @@ iscsi_is_free_pdu_deferred(struct spdk_iscsi_pdu *pdu) return false; } -/** - * \brief Makes one attempt to flush response PDUs back to the initiator. - * - * Builds a list of iovecs for response PDUs that must be sent back to the - * initiator and passes it to writev(). - * - * Since the socket is non-blocking, writev() may not be able to flush all - * of the iovecs, and may even partially flush one of the iovecs. In this - * case, the partially flushed PDU will remain on the write_pdu_list with - * an offset pointing to the next byte to be flushed. - * - * Returns 0 if all PDUs were flushed. - * - * Returns 1 if some PDUs could not be flushed due to lack of send buffer - * space. - * - * Returns -1 if an exception error occurred indicating the TCP connection - * should be closed. - */ -static int -iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn) -{ - const int num_iovs = 32; - struct iovec iovs[num_iovs]; - struct iovec *iov = iovs; - int iovcnt = 0; - int bytes = 0; - uint32_t total_length = 0; - uint32_t mapped_length = 0; - struct spdk_iscsi_pdu *pdu; - int pdu_length; - TAILQ_HEAD(, spdk_iscsi_pdu) completed_pdus_list; - - pdu = TAILQ_FIRST(&conn->write_pdu_list); - - if (pdu == NULL) { - return 0; - } - - /* - * Build up a list of iovecs for the first few PDUs in the - * connection's write_pdu_list. For the first PDU, check if it was - * partially written out the last time this function was called, and - * if so adjust the iovec array accordingly. This check is done in - * spdk_iscsi_build_iovs() and so applied to remaining PDUs too. - * But extra overhead is negligible. - */ - while (pdu != NULL && ((num_iovs - iovcnt) > 0)) { - iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt, - pdu, &mapped_length); - total_length += mapped_length; - pdu = TAILQ_NEXT(pdu, tailq); - } - - spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt); - - bytes = spdk_sock_writev(conn->sock, iov, iovcnt); - if (bytes == -1) { - if (errno == EWOULDBLOCK || errno == EAGAIN) { - return 1; - } else { - SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n", - errno, spdk_strerror(errno)); - return -1; - } - } - - spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0); - - pdu = TAILQ_FIRST(&conn->write_pdu_list); - - /* - * Free any PDUs that were fully written. If a PDU was only - * partially written, update its writev_offset so that next - * time only the unwritten portion will be sent to writev(). - */ - TAILQ_INIT(&completed_pdus_list); - while (bytes > 0) { - pdu_length = iscsi_get_pdu_length(pdu, conn->header_digest, - conn->data_digest); - pdu_length -= pdu->writev_offset; - - if (bytes >= pdu_length) { - bytes -= pdu_length; - TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq); - TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq); - pdu = TAILQ_FIRST(&conn->write_pdu_list); - } else { - pdu->writev_offset += bytes; - bytes = 0; - } - } - - while (!TAILQ_EMPTY(&completed_pdus_list)) { - pdu = TAILQ_FIRST(&completed_pdus_list); - TAILQ_REMOVE(&completed_pdus_list, pdu, tailq); - if ((conn->full_feature) && - (conn->sess->ErrorRecoveryLevel >= 1) && - iscsi_is_free_pdu_deferred(pdu)) { - SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n", - from_be32(&pdu->bhs.stat_sn)); - TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu, - tailq); - } else { - spdk_iscsi_conn_free_pdu(conn, pdu); - } - } - - return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1; -} - -/** - * \brief Flushes response PDUs back to the initiator. - * - * This function may return without all PDUs having flushed to the - * underlying TCP socket buffer - for example, in the case where the - * socket buffer is already full. - * - * If not all PDUs are flushed, then subsequent calls to this routine - * will eventually flush remaining PDUs. - * - * PDUs are flushed only during normal RUNNING connection state. - */ -static int -iscsi_conn_flush_pdus(void *_conn) -{ - struct spdk_iscsi_conn *conn = _conn; - int rc; - - if (spdk_unlikely(conn->state > ISCSI_CONN_STATE_RUNNING)) { - return 1; - } - - rc = iscsi_conn_flush_pdus_internal(conn); - if (rc == 0 && conn->flush_poller != NULL) { - spdk_poller_unregister(&conn->flush_poller); - } else if (rc == 1 && conn->flush_poller == NULL) { - conn->flush_poller = spdk_poller_register(iscsi_conn_flush_pdus, - conn, 50); - } - - if (rc < 0) { - conn->state = ISCSI_CONN_STATE_EXITING; - } - - return 1; -} - static int iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx) { @@ -1594,11 +1414,44 @@ iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx) return rc; } +static void +_iscsi_conn_pdu_write_done(void *cb_arg, int err) +{ + struct spdk_iscsi_pdu *pdu = cb_arg; + struct spdk_iscsi_conn *conn = pdu->conn; + + assert(conn != NULL); + + if (spdk_unlikely(conn->state >= ISCSI_CONN_STATE_EXITING)) { + /* The other policy will recycle the resource */ + return; + } + + TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq); + + if (err != 0) { + conn->state = ISCSI_CONN_STATE_EXITING; + } else { + spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, pdu->mapped_length, (uintptr_t)pdu, 0); + } + + if ((conn->full_feature) && + (conn->sess->ErrorRecoveryLevel >= 1) && + iscsi_is_free_pdu_deferred(pdu)) { + SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n", + from_be32(&pdu->bhs.stat_sn)); + TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu, + tailq); + } else { + spdk_iscsi_conn_free_pdu(conn, pdu); + } +} + void spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu) { uint32_t crc32c; - int rc; + ssize_t rc; if (spdk_unlikely(pdu->dif_insert_or_strip)) { rc = iscsi_dif_verify(pdu, &pdu->dif_ctx); @@ -1624,7 +1477,30 @@ spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *p } TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq); - iscsi_conn_flush_pdus(conn); + + if (spdk_unlikely(conn->state >= ISCSI_CONN_STATE_EXITING)) { + return; + } + pdu->sock_req.iovcnt = spdk_iscsi_build_iovs(conn, pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, + &pdu->mapped_length); + pdu->sock_req.cb_fn = _iscsi_conn_pdu_write_done; + pdu->sock_req.cb_arg = pdu; + + spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, pdu->mapped_length, (uintptr_t)pdu, + pdu->sock_req.iovcnt); + if (spdk_unlikely((pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) || + (pdu->bhs.opcode == ISCSI_OP_LOGOUT_RSP) || + (pdu->bhs.opcode == ISCSI_OP_TEXT_RSP))) { + rc = spdk_sock_writev(conn->sock, pdu->iov, pdu->sock_req.iovcnt); + if (rc == pdu->mapped_length) { + _iscsi_conn_pdu_write_done(pdu, 0); + } else { + SPDK_ERRLOG("Login RSP or Logout RSP could not write to socket.\n"); + _iscsi_conn_pdu_write_done(pdu, -1); + } + } else { + spdk_sock_writev_async(conn->sock, &pdu->sock_req); + } } static void diff --git a/lib/iscsi/conn.h b/lib/iscsi/conn.h index 53f9b8f34..dd7a10add 100644 --- a/lib/iscsi/conn.h +++ b/lib/iscsi/conn.h @@ -190,7 +190,6 @@ struct spdk_iscsi_conn { char *partial_text_parameter; STAILQ_ENTRY(spdk_iscsi_conn) link; - struct spdk_poller *flush_poller; bool is_stopped; /* Set true when connection is stopped for migration */ TAILQ_HEAD(queued_r2t_tasks, spdk_iscsi_task) queued_r2t_tasks; TAILQ_HEAD(active_r2t_tasks, spdk_iscsi_task) active_r2t_tasks; diff --git a/lib/iscsi/iscsi.h b/lib/iscsi/iscsi.h index 7cf4eb27e..101be4829 100644 --- a/lib/iscsi/iscsi.h +++ b/lib/iscsi/iscsi.h @@ -41,6 +41,7 @@ #include "spdk/iscsi_spec.h" #include "spdk/event.h" #include "spdk/thread.h" +#include "spdk/sock.h" #include "spdk/scsi.h" #include "iscsi/param.h" @@ -161,6 +162,12 @@ struct spdk_mobj { void *buf; }; +/* + * Maximum number of SGL elements, i.e., + * BHS, AHS, Header Digest, Data Segment and Data Digest. + */ +#define SPDK_ISCSI_MAX_SGL_DESCRIPTORS (5) + struct spdk_iscsi_pdu { struct iscsi_bhs bhs; struct spdk_mobj *mobj; @@ -184,6 +191,13 @@ struct spdk_iscsi_pdu { bool dif_insert_or_strip; struct spdk_dif_ctx dif_ctx; struct spdk_iscsi_conn *conn; + + /* The sock request ends with a 0 length iovec. Place the actual iovec immediately + * after it. There is a static assert below to check if the compiler inserted + * any unwanted padding */ + int32_t mapped_length; + struct spdk_sock_request sock_req; + struct iovec iov[SPDK_ISCSI_MAX_SGL_DESCRIPTORS]; TAILQ_ENTRY(spdk_iscsi_pdu) tailq; @@ -199,6 +213,9 @@ struct spdk_iscsi_pdu { uint8_t data[32]; } sense; }; +SPDK_STATIC_ASSERT(offsetof(struct spdk_iscsi_pdu, + sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct spdk_iscsi_pdu, iov), + "Compiler inserted padding between iov and sock_req"); enum iscsi_connection_state { ISCSI_CONN_STATE_INVALID = 0, diff --git a/test/unit/lib/iscsi/conn.c/conn_ut.c b/test/unit/lib/iscsi/conn.c/conn_ut.c index eb83621a8..b076c084d 100644 --- a/test/unit/lib/iscsi/conn.c/conn_ut.c +++ b/test/unit/lib/iscsi/conn.c/conn_ut.c @@ -510,51 +510,6 @@ process_non_read_task_completion_test(void) CU_ASSERT(primary.scsi.ref == 0); } -static void -recursive_flush_pdus_calls(void) -{ - struct spdk_iscsi_pdu pdu1 = {}, pdu2 = {}, pdu3 = {}; - struct spdk_iscsi_task task1 = {}, task2 = {}, task3 = {}; - struct spdk_iscsi_conn conn = {}; - int rc; - - TAILQ_INIT(&conn.write_pdu_list); - conn.data_in_cnt = 3; - - task1.scsi.ref = 1; - task2.scsi.ref = 1; - task3.scsi.ref = 1; - - task1.scsi.offset = 512; - task2.scsi.offset = 512 * 2; - task3.scsi.offset = 512 * 3; - - pdu1.task = &task1; - pdu2.task = &task2; - pdu3.task = &task3; - - pdu1.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - pdu2.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - pdu3.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - - DSET24(&pdu1.bhs.data_segment_len, 512); - DSET24(&pdu2.bhs.data_segment_len, 512); - DSET24(&pdu3.bhs.data_segment_len, 512); - - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu1, tailq); - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu2, tailq); - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu3, tailq); - - g_sock_writev_bytes = (512 + ISCSI_BHS_LEN) * 3; - - rc = iscsi_conn_flush_pdus_internal(&conn); - CU_ASSERT(rc == 0); - - CU_ASSERT(task1.scsi.ref == 0); - CU_ASSERT(task2.scsi.ref == 0); - CU_ASSERT(task3.scsi.ref == 0); -} - static bool dequeue_pdu(void *_head, struct spdk_iscsi_pdu *pdu) { @@ -935,7 +890,6 @@ main(int argc, char **argv) propagate_scsi_error_status_for_split_read_tasks) == NULL || CU_add_test(suite, "process_non_read_task_completion_test", process_non_read_task_completion_test) == NULL || - CU_add_test(suite, "recursive_flush_pdus_calls", recursive_flush_pdus_calls) == NULL || CU_add_test(suite, "free_tasks_on_connection", free_tasks_on_connection) == NULL || CU_add_test(suite, "free_tasks_with_queued_datain", free_tasks_with_queued_datain) == NULL || CU_add_test(suite, "abort_queued_datain_task_test", abort_queued_datain_task_test) == NULL ||