From d09874f3a200fad6c883f9ee5bda08157983a569 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 29 May 2019 02:34:28 -0700 Subject: [PATCH] nvme: remove avx optimizations when copying command Using AVX512 or AVX2 ends up being a small pessimization. I think AVX works better for copies when there are multiple cachelines to copy. I see a 2-3% improvement in high IOPs benchmarks when reverting to SSE. Signed-off-by: Jim Harris Change-Id: I3d70a1e359e98cec2a9da41ccf9af2de9baa5868 Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/456247 Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto Reviewed-by: Paul Luse Tested-by: SPDK CI Jenkins --- lib/nvme/nvme_pcie.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c index cb3c6fba2..976e4b73c 100644 --- a/lib/nvme/nvme_pcie.c +++ b/lib/nvme/nvme_pcie.c @@ -1095,18 +1095,7 @@ static inline void nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) { /* dst and src are known to be non-overlapping and 64-byte aligned. */ -#if defined(__AVX512F__) - __m512i *d512 = (__m512i *)dst; - const __m512i *s512 = (const __m512i *)src; - - _mm512_stream_si512(d512, _mm512_load_si512(s512)); -#elif defined(__AVX__) - __m256i *d256 = (__m256i *)dst; - const __m256i *s256 = (const __m256i *)src; - - _mm256_stream_si256(&d256[0], _mm256_load_si256(&s256[0])); - _mm256_stream_si256(&d256[1], _mm256_load_si256(&s256[1])); -#elif defined(__SSE2__) +#if defined(__SSE2__) __m128i *d128 = (__m128i *)dst; const __m128i *s128 = (const __m128i *)src;