nvme: remove avx optimizations when copying command
Using AVX512 or AVX2 ends up being a small pessimization. I think AVX works better for copies when there are multiple cachelines to copy. I see a 2-3% improvement in high IOPs benchmarks when reverting to SSE. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I3d70a1e359e98cec2a9da41ccf9af2de9baa5868 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/456247 Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Paul Luse <paul.e.luse@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
c85164bd69
commit
d09874f3a2
@ -1095,18 +1095,7 @@ static inline void
|
|||||||
nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
|
nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src)
|
||||||
{
|
{
|
||||||
/* dst and src are known to be non-overlapping and 64-byte aligned. */
|
/* dst and src are known to be non-overlapping and 64-byte aligned. */
|
||||||
#if defined(__AVX512F__)
|
#if defined(__SSE2__)
|
||||||
__m512i *d512 = (__m512i *)dst;
|
|
||||||
const __m512i *s512 = (const __m512i *)src;
|
|
||||||
|
|
||||||
_mm512_stream_si512(d512, _mm512_load_si512(s512));
|
|
||||||
#elif defined(__AVX__)
|
|
||||||
__m256i *d256 = (__m256i *)dst;
|
|
||||||
const __m256i *s256 = (const __m256i *)src;
|
|
||||||
|
|
||||||
_mm256_stream_si256(&d256[0], _mm256_load_si256(&s256[0]));
|
|
||||||
_mm256_stream_si256(&d256[1], _mm256_load_si256(&s256[1]));
|
|
||||||
#elif defined(__SSE2__)
|
|
||||||
__m128i *d128 = (__m128i *)dst;
|
__m128i *d128 = (__m128i *)dst;
|
||||||
const __m128i *s128 = (const __m128i *)src;
|
const __m128i *s128 = (const __m128i *)src;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user