lib/reduce: eliminate memcpy in read decompression path

This is the first in a series of patches to eliminate memcpy ops in the comp/decomp paths. Currently the lib uses 2 scratch buffers and copies all data in and out of them to the user buffers following a comp/decomp. This patch replaces the memcpy in one of the paths by constructing an iovec array that points to a combination of the scratch buffer and user buffer so that user data decompresses directly into the user buffer and any data in the chunk that isn't needed by the user will be sent to the scratch buffer. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: Ib1956875729a82d218527bc81795f750d1df2b89 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/459662 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
2019-06-27 12:32:26 -04:00 · 2019-06-27 12:32:26 -04:00 · 89a9a50497
commit 89a9a50497
parent f499ef7191
1 changed files with 44 additions and 12 deletions
--- a/lib/reduce/reduce.c
+++ b/lib/reduce/reduce.c
@ -87,6 +87,8 @@ struct spdk_reduce_chunk_map {
 	uint64_t		io_unit_index[0];
 };
 #define REDUCE_MAX_IOVECS	32
 struct spdk_reduce_vol_request {
 	/**
 	 *  Scratch buffer used for uncompressed chunk.  This is used for:
@ -97,6 +99,15 @@ struct spdk_reduce_vol_request {
 	 */
 	uint8_t					*decomp_buf;
 	struct iovec				*decomp_buf_iov;
 	/**
 	 * These are used to construct the iovecs that are sent to
 	 *  the decomp engine, they point to a mix of the scratch buffer
 	 *  and user buffer
 	 */
 	struct iovec				decomp_iov[REDUCE_MAX_IOVECS];
 	int					decomp_iovcnt;
 	/**
 	 *  Scratch buffer used for compressed chunk.  This is used for:
 	 *   1) destination buffer for compression operations
@ -1058,15 +1069,45 @@ static void
 _reduce_vol_decompress_chunk(struct spdk_reduce_vol_request *req, reduce_request_fn next_fn)
 {
 	struct spdk_reduce_vol *vol = req->vol;
 	uint64_t chunk_offset, remainder = 0;
 	uint64_t ttl_len = 0;
 	int i;
 	req->decomp_iovcnt = 0;
 	chunk_offset = req->offset % vol->logical_blocks_per_chunk;
 	if (chunk_offset) {
 		/* first iov point to our scratch buffer for any offset into the chunk */
 		req->decomp_iov[0].iov_base = req->decomp_buf;
 		req->decomp_iov[0].iov_len = chunk_offset * vol->params.logical_block_size;
 		ttl_len += req->decomp_iov[0].iov_len;
 		req->decomp_iovcnt = 1;
 	}
 	/* now the user data iov, direct to the user buffer */
 	for (i = 0; i < req->iovcnt; i++) {
 		req->decomp_iov[i + req->decomp_iovcnt].iov_base = req->iov[i].iov_base;
 		req->decomp_iov[i + req->decomp_iovcnt].iov_len = req->iov[i].iov_len;
 		ttl_len += req->decomp_iov[i + req->decomp_iovcnt].iov_len;
 		req->decomp_iovcnt++;
 	}
 	/* send the rest of the chunk to our scratch buffer */
 	remainder = vol->params.chunk_size - ttl_len;
 	if (remainder) {
 		req->decomp_iov[req->decomp_iovcnt].iov_base = req->decomp_buf + ttl_len;
 		req->decomp_iov[req->decomp_iovcnt].iov_len = remainder;
 		ttl_len += req->decomp_iov[req->decomp_iovcnt].iov_len;
 		req->decomp_iovcnt++;
 	}
 	assert(ttl_len == vol->params.chunk_size);
 	req->backing_cb_args.cb_fn = next_fn;
 	req->backing_cb_args.cb_arg = req;
 	req->comp_buf_iov[0].iov_base = req->comp_buf;
 	req->comp_buf_iov[0].iov_len = req->chunk->compressed_size;
 	req->decomp_buf_iov[0].iov_base = req->decomp_buf;
 	req->decomp_buf_iov[0].iov_len = vol->params.chunk_size;
 	vol->backing_dev->decompress(vol->backing_dev,
-				     req->comp_buf_iov, 1, req->decomp_buf_iov, 1,
+				     req->comp_buf_iov, 1, &req->decomp_iov[0], req->decomp_iovcnt,
 				     &req->backing_cb_args);
 }
@ -1135,9 +1176,6 @@ _read_decompress_done(void *_req, int reduce_errno)
 {
 	struct spdk_reduce_vol_request *req = _req;
 	struct spdk_reduce_vol *vol = req->vol;
 	uint64_t chunk_offset;
 	uint8_t *buf;
 	int i;
 	/* Negative reduce_errno indicates failure for compression operations. */
 	if (reduce_errno < 0) {
@ -1154,12 +1192,6 @@ _read_decompress_done(void *_req, int reduce_errno)
 		return;
 	}
 	chunk_offset = req->offset % vol->logical_blocks_per_chunk;
 	buf = req->decomp_buf + chunk_offset * vol->params.logical_block_size;
 	for (i = 0; i < req->iovcnt; i++) {
 		memcpy(req->iov[i].iov_base, buf, req->iov[i].iov_len);
 		buf += req->iov[i].iov_len;
 	}
 	_reduce_vol_complete_req(req, 0);
 }