diff --git a/lib/iscsi/conn.c b/lib/iscsi/conn.c index 8c8bdca2a..7c727ddbb 100644 --- a/lib/iscsi/conn.c +++ b/lib/iscsi/conn.c @@ -193,8 +193,6 @@ iscsi_poll_group_remove_conn(struct spdk_iscsi_poll_group *pg, struct spdk_iscsi SPDK_ERRLOG("Failed to remove sock=%p of conn=%p\n", conn->sock, conn); } - spdk_poller_unregister(&conn->flush_poller); - conn->is_stopped = true; STAILQ_REMOVE(&pg->connections, conn, spdk_iscsi_conn, link); } @@ -1380,36 +1378,6 @@ spdk_iscsi_conn_readv_data(struct spdk_iscsi_conn *conn, return SPDK_ISCSI_CONNECTION_FATAL; } -static int -iscsi_get_pdu_length(struct spdk_iscsi_pdu *pdu, int header_digest, - int data_digest) -{ - int data_len, enable_digest, total; - - enable_digest = 1; - if (pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) { - enable_digest = 0; - } - - total = ISCSI_BHS_LEN; - - total += (4 * pdu->bhs.total_ahs_len); - - if (enable_digest && header_digest) { - total += ISCSI_DIGEST_LEN; - } - - data_len = DGET24(pdu->bhs.data_segment_len); - if (data_len > 0) { - total += ISCSI_ALIGN(data_len); - if (enable_digest && data_digest) { - total += ISCSI_DIGEST_LEN; - } - } - - return total; -} - static bool iscsi_is_free_pdu_deferred(struct spdk_iscsi_pdu *pdu) { @@ -1425,154 +1393,6 @@ iscsi_is_free_pdu_deferred(struct spdk_iscsi_pdu *pdu) return false; } -/** - * \brief Makes one attempt to flush response PDUs back to the initiator. - * - * Builds a list of iovecs for response PDUs that must be sent back to the - * initiator and passes it to writev(). - * - * Since the socket is non-blocking, writev() may not be able to flush all - * of the iovecs, and may even partially flush one of the iovecs. In this - * case, the partially flushed PDU will remain on the write_pdu_list with - * an offset pointing to the next byte to be flushed. - * - * Returns 0 if all PDUs were flushed. - * - * Returns 1 if some PDUs could not be flushed due to lack of send buffer - * space. - * - * Returns -1 if an exception error occurred indicating the TCP connection - * should be closed. - */ -static int -iscsi_conn_flush_pdus_internal(struct spdk_iscsi_conn *conn) -{ - const int num_iovs = 32; - struct iovec iovs[num_iovs]; - struct iovec *iov = iovs; - int iovcnt = 0; - int bytes = 0; - uint32_t total_length = 0; - uint32_t mapped_length = 0; - struct spdk_iscsi_pdu *pdu; - int pdu_length; - TAILQ_HEAD(, spdk_iscsi_pdu) completed_pdus_list; - - pdu = TAILQ_FIRST(&conn->write_pdu_list); - - if (pdu == NULL) { - return 0; - } - - /* - * Build up a list of iovecs for the first few PDUs in the - * connection's write_pdu_list. For the first PDU, check if it was - * partially written out the last time this function was called, and - * if so adjust the iovec array accordingly. This check is done in - * spdk_iscsi_build_iovs() and so applied to remaining PDUs too. - * But extra overhead is negligible. - */ - while (pdu != NULL && ((num_iovs - iovcnt) > 0)) { - iovcnt += spdk_iscsi_build_iovs(conn, &iovs[iovcnt], num_iovs - iovcnt, - pdu, &mapped_length); - total_length += mapped_length; - pdu = TAILQ_NEXT(pdu, tailq); - } - - spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, total_length, 0, iovcnt); - - bytes = spdk_sock_writev(conn->sock, iov, iovcnt); - if (bytes == -1) { - if (errno == EWOULDBLOCK || errno == EAGAIN) { - return 1; - } else { - SPDK_ERRLOG("spdk_sock_writev() failed, errno %d: %s\n", - errno, spdk_strerror(errno)); - return -1; - } - } - - spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, bytes, 0, 0); - - pdu = TAILQ_FIRST(&conn->write_pdu_list); - - /* - * Free any PDUs that were fully written. If a PDU was only - * partially written, update its writev_offset so that next - * time only the unwritten portion will be sent to writev(). - */ - TAILQ_INIT(&completed_pdus_list); - while (bytes > 0) { - pdu_length = iscsi_get_pdu_length(pdu, conn->header_digest, - conn->data_digest); - pdu_length -= pdu->writev_offset; - - if (bytes >= pdu_length) { - bytes -= pdu_length; - TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq); - TAILQ_INSERT_TAIL(&completed_pdus_list, pdu, tailq); - pdu = TAILQ_FIRST(&conn->write_pdu_list); - } else { - pdu->writev_offset += bytes; - bytes = 0; - } - } - - while (!TAILQ_EMPTY(&completed_pdus_list)) { - pdu = TAILQ_FIRST(&completed_pdus_list); - TAILQ_REMOVE(&completed_pdus_list, pdu, tailq); - if ((conn->full_feature) && - (conn->sess->ErrorRecoveryLevel >= 1) && - iscsi_is_free_pdu_deferred(pdu)) { - SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n", - from_be32(&pdu->bhs.stat_sn)); - TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu, - tailq); - } else { - spdk_iscsi_conn_free_pdu(conn, pdu); - } - } - - return TAILQ_EMPTY(&conn->write_pdu_list) ? 0 : 1; -} - -/** - * \brief Flushes response PDUs back to the initiator. - * - * This function may return without all PDUs having flushed to the - * underlying TCP socket buffer - for example, in the case where the - * socket buffer is already full. - * - * If not all PDUs are flushed, then subsequent calls to this routine - * will eventually flush remaining PDUs. - * - * PDUs are flushed only during normal RUNNING connection state. - */ -static int -iscsi_conn_flush_pdus(void *_conn) -{ - struct spdk_iscsi_conn *conn = _conn; - int rc; - - if (spdk_unlikely(conn->state > ISCSI_CONN_STATE_RUNNING)) { - return 1; - } - - rc = iscsi_conn_flush_pdus_internal(conn); - if (rc == 0 && conn->flush_poller != NULL) { - spdk_poller_unregister(&conn->flush_poller); - } else if (rc == 1 && conn->flush_poller == NULL) { - conn->flush_poller = spdk_poller_register(iscsi_conn_flush_pdus, - conn, 50); - } - - if (rc < 0) { - conn->state = ISCSI_CONN_STATE_EXITING; - } - - return 1; -} - static int iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx) { @@ -1594,11 +1414,44 @@ iscsi_dif_verify(struct spdk_iscsi_pdu *pdu, struct spdk_dif_ctx *dif_ctx) return rc; } +static void +_iscsi_conn_pdu_write_done(void *cb_arg, int err) +{ + struct spdk_iscsi_pdu *pdu = cb_arg; + struct spdk_iscsi_conn *conn = pdu->conn; + + assert(conn != NULL); + + if (spdk_unlikely(conn->state >= ISCSI_CONN_STATE_EXITING)) { + /* The other policy will recycle the resource */ + return; + } + + TAILQ_REMOVE(&conn->write_pdu_list, pdu, tailq); + + if (err != 0) { + conn->state = ISCSI_CONN_STATE_EXITING; + } else { + spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_DONE, conn->id, pdu->mapped_length, (uintptr_t)pdu, 0); + } + + if ((conn->full_feature) && + (conn->sess->ErrorRecoveryLevel >= 1) && + iscsi_is_free_pdu_deferred(pdu)) { + SPDK_DEBUGLOG(SPDK_LOG_ISCSI, "stat_sn=%d\n", + from_be32(&pdu->bhs.stat_sn)); + TAILQ_INSERT_TAIL(&conn->snack_pdu_list, pdu, + tailq); + } else { + spdk_iscsi_conn_free_pdu(conn, pdu); + } +} + void spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *pdu) { uint32_t crc32c; - int rc; + ssize_t rc; if (spdk_unlikely(pdu->dif_insert_or_strip)) { rc = iscsi_dif_verify(pdu, &pdu->dif_ctx); @@ -1624,7 +1477,30 @@ spdk_iscsi_conn_write_pdu(struct spdk_iscsi_conn *conn, struct spdk_iscsi_pdu *p } TAILQ_INSERT_TAIL(&conn->write_pdu_list, pdu, tailq); - iscsi_conn_flush_pdus(conn); + + if (spdk_unlikely(conn->state >= ISCSI_CONN_STATE_EXITING)) { + return; + } + pdu->sock_req.iovcnt = spdk_iscsi_build_iovs(conn, pdu->iov, SPDK_COUNTOF(pdu->iov), pdu, + &pdu->mapped_length); + pdu->sock_req.cb_fn = _iscsi_conn_pdu_write_done; + pdu->sock_req.cb_arg = pdu; + + spdk_trace_record(TRACE_ISCSI_FLUSH_WRITEBUF_START, conn->id, pdu->mapped_length, (uintptr_t)pdu, + pdu->sock_req.iovcnt); + if (spdk_unlikely((pdu->bhs.opcode == ISCSI_OP_LOGIN_RSP) || + (pdu->bhs.opcode == ISCSI_OP_LOGOUT_RSP) || + (pdu->bhs.opcode == ISCSI_OP_TEXT_RSP))) { + rc = spdk_sock_writev(conn->sock, pdu->iov, pdu->sock_req.iovcnt); + if (rc == pdu->mapped_length) { + _iscsi_conn_pdu_write_done(pdu, 0); + } else { + SPDK_ERRLOG("Login RSP or Logout RSP could not write to socket.\n"); + _iscsi_conn_pdu_write_done(pdu, -1); + } + } else { + spdk_sock_writev_async(conn->sock, &pdu->sock_req); + } } static void diff --git a/lib/iscsi/conn.h b/lib/iscsi/conn.h index 53f9b8f34..dd7a10add 100644 --- a/lib/iscsi/conn.h +++ b/lib/iscsi/conn.h @@ -190,7 +190,6 @@ struct spdk_iscsi_conn { char *partial_text_parameter; STAILQ_ENTRY(spdk_iscsi_conn) link; - struct spdk_poller *flush_poller; bool is_stopped; /* Set true when connection is stopped for migration */ TAILQ_HEAD(queued_r2t_tasks, spdk_iscsi_task) queued_r2t_tasks; TAILQ_HEAD(active_r2t_tasks, spdk_iscsi_task) active_r2t_tasks; diff --git a/lib/iscsi/iscsi.h b/lib/iscsi/iscsi.h index 7cf4eb27e..101be4829 100644 --- a/lib/iscsi/iscsi.h +++ b/lib/iscsi/iscsi.h @@ -41,6 +41,7 @@ #include "spdk/iscsi_spec.h" #include "spdk/event.h" #include "spdk/thread.h" +#include "spdk/sock.h" #include "spdk/scsi.h" #include "iscsi/param.h" @@ -161,6 +162,12 @@ struct spdk_mobj { void *buf; }; +/* + * Maximum number of SGL elements, i.e., + * BHS, AHS, Header Digest, Data Segment and Data Digest. + */ +#define SPDK_ISCSI_MAX_SGL_DESCRIPTORS (5) + struct spdk_iscsi_pdu { struct iscsi_bhs bhs; struct spdk_mobj *mobj; @@ -184,6 +191,13 @@ struct spdk_iscsi_pdu { bool dif_insert_or_strip; struct spdk_dif_ctx dif_ctx; struct spdk_iscsi_conn *conn; + + /* The sock request ends with a 0 length iovec. Place the actual iovec immediately + * after it. There is a static assert below to check if the compiler inserted + * any unwanted padding */ + int32_t mapped_length; + struct spdk_sock_request sock_req; + struct iovec iov[SPDK_ISCSI_MAX_SGL_DESCRIPTORS]; TAILQ_ENTRY(spdk_iscsi_pdu) tailq; @@ -199,6 +213,9 @@ struct spdk_iscsi_pdu { uint8_t data[32]; } sense; }; +SPDK_STATIC_ASSERT(offsetof(struct spdk_iscsi_pdu, + sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct spdk_iscsi_pdu, iov), + "Compiler inserted padding between iov and sock_req"); enum iscsi_connection_state { ISCSI_CONN_STATE_INVALID = 0, diff --git a/test/unit/lib/iscsi/conn.c/conn_ut.c b/test/unit/lib/iscsi/conn.c/conn_ut.c index eb83621a8..b076c084d 100644 --- a/test/unit/lib/iscsi/conn.c/conn_ut.c +++ b/test/unit/lib/iscsi/conn.c/conn_ut.c @@ -510,51 +510,6 @@ process_non_read_task_completion_test(void) CU_ASSERT(primary.scsi.ref == 0); } -static void -recursive_flush_pdus_calls(void) -{ - struct spdk_iscsi_pdu pdu1 = {}, pdu2 = {}, pdu3 = {}; - struct spdk_iscsi_task task1 = {}, task2 = {}, task3 = {}; - struct spdk_iscsi_conn conn = {}; - int rc; - - TAILQ_INIT(&conn.write_pdu_list); - conn.data_in_cnt = 3; - - task1.scsi.ref = 1; - task2.scsi.ref = 1; - task3.scsi.ref = 1; - - task1.scsi.offset = 512; - task2.scsi.offset = 512 * 2; - task3.scsi.offset = 512 * 3; - - pdu1.task = &task1; - pdu2.task = &task2; - pdu3.task = &task3; - - pdu1.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - pdu2.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - pdu3.bhs.opcode = ISCSI_OP_SCSI_DATAIN; - - DSET24(&pdu1.bhs.data_segment_len, 512); - DSET24(&pdu2.bhs.data_segment_len, 512); - DSET24(&pdu3.bhs.data_segment_len, 512); - - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu1, tailq); - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu2, tailq); - TAILQ_INSERT_TAIL(&conn.write_pdu_list, &pdu3, tailq); - - g_sock_writev_bytes = (512 + ISCSI_BHS_LEN) * 3; - - rc = iscsi_conn_flush_pdus_internal(&conn); - CU_ASSERT(rc == 0); - - CU_ASSERT(task1.scsi.ref == 0); - CU_ASSERT(task2.scsi.ref == 0); - CU_ASSERT(task3.scsi.ref == 0); -} - static bool dequeue_pdu(void *_head, struct spdk_iscsi_pdu *pdu) { @@ -935,7 +890,6 @@ main(int argc, char **argv) propagate_scsi_error_status_for_split_read_tasks) == NULL || CU_add_test(suite, "process_non_read_task_completion_test", process_non_read_task_completion_test) == NULL || - CU_add_test(suite, "recursive_flush_pdus_calls", recursive_flush_pdus_calls) == NULL || CU_add_test(suite, "free_tasks_on_connection", free_tasks_on_connection) == NULL || CU_add_test(suite, "free_tasks_with_queued_datain", free_tasks_with_queued_datain) == NULL || CU_add_test(suite, "abort_queued_datain_task_test", abort_queued_datain_task_test) == NULL ||