From fed358a0e7bfaccecd3e785d834297809952bae5 Mon Sep 17 00:00:00 2001 From: Richael Zhuang Date: Wed, 15 Feb 2023 12:22:06 +0800 Subject: [PATCH] util: fix misaligned load for uint64_t type The following error was reported when running gpt_ut which is related to crc32_update(). "load of misaligned address 0x001ffeff78cc for type 'const uint64_t', which requires 8 byte alignment". This patch preprocesses the first several bytes to make the buf address passed to __crc32_d or__crc32_cd is 8 byte aligned. And finally process the trailing bytes. For function spdk_crc32c_update in crc32c.c, memcpy was used to avoid misaligned load problem. Update it with above solution to reduce extra overhead. Signed-off-by: Richael Zhuang Change-Id: I7c7aaa41e1c042a96668158818b06729fb3ceec6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16801 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- lib/util/crc32.c | 23 ++++++++++++----- lib/util/crc32c.c | 66 ++++++++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/lib/util/crc32.c b/lib/util/crc32.c index 9a3e946d3..7b444d8fb 100644 --- a/lib/util/crc32.c +++ b/lib/util/crc32.c @@ -31,22 +31,33 @@ crc32_table_init(struct spdk_crc32_table *table, uint32_t polynomial_reflect) uint32_t crc32_update(const struct spdk_crc32_table *table, const void *buf, size_t len, uint32_t crc) { - size_t count; + size_t count_pre, count_post, count_mid; const uint64_t *dword_buf; - count = len & 7; - while (count--) { + /* process the head and tail bytes seperately to make the buf address + * passed to crc32_d is 8 byte aligned. This can avoid unaligned loads. + */ + count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7); + count_post = (uint64_t)(buf + len) & 7; + count_mid = (len - count_pre - count_post) / 8; + + while (count_pre--) { crc = __crc32b(crc, *(const uint8_t *)buf); buf++; } - dword_buf = (const uint64_t *)buf; - count = len / 8; - while (count--) { + dword_buf = (const uint64_t *)buf; + while (count_mid--) { crc = __crc32d(crc, *dword_buf); dword_buf++; } + buf = dword_buf; + while (count_post--) { + crc = __crc32b(crc, *(const uint8_t *)buf); + buf++; + } + return crc; } diff --git a/lib/util/crc32c.c b/lib/util/crc32c.c index 7b49498de..87d77c505 100644 --- a/lib/util/crc32c.c +++ b/lib/util/crc32c.c @@ -20,30 +20,34 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc) uint32_t spdk_crc32c_update(const void *buf, size_t len, uint32_t crc) { + size_t count_pre, count_post, count_mid; + const uint64_t *dword_buf; uint64_t crc_tmp64; - size_t count; + + /* process the head and tail bytes seperately to make the buf address + * passed to _mm_crc32_u64 is 8 byte aligned. This can avoid unaligned loads. + */ + count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7); + count_post = (uint64_t)(buf + len) & 7; + count_mid = (len - count_pre - count_post) / 8; + + while (count_pre--) { + crc = _mm_crc32_u8(crc, *(const uint8_t *)buf); + buf++; + } /* _mm_crc32_u64() needs a 64-bit intermediate value */ crc_tmp64 = crc; + dword_buf = (const uint64_t *)buf; - /* Process as much of the buffer as possible in 64-bit blocks. */ - count = len / 8; - while (count--) { - uint64_t block; - - /* - * Use memcpy() to avoid unaligned loads, which are undefined behavior in C. - * The compiler will optimize out the memcpy() in release builds. - */ - memcpy(&block, buf, sizeof(block)); - crc_tmp64 = _mm_crc32_u64(crc_tmp64, block); - buf += sizeof(block); + while (count_mid--) { + crc_tmp64 = _mm_crc32_u64(crc_tmp64, *dword_buf); + dword_buf++; } - crc = (uint32_t)crc_tmp64; - /* Handle any trailing bytes. */ - count = len & 7; - while (count--) { + buf = dword_buf; + crc = (uint32_t)crc_tmp64; + while (count_post--) { crc = _mm_crc32_u8(crc, *(const uint8_t *)buf); buf++; } @@ -56,19 +60,29 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc) uint32_t spdk_crc32c_update(const void *buf, size_t len, uint32_t crc) { - size_t count; + size_t count_pre, count_post, count_mid; + const uint64_t *dword_buf; - count = len / 8; - while (count--) { - uint64_t block; + /* process the head and tail bytes seperately to make the buf address + * passed to crc32_cd is 8 byte aligned. This can avoid unaligned loads. + */ + count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7); + count_post = (uint64_t)(buf + len) & 7; + count_mid = (len - count_pre - count_post) / 8; - memcpy(&block, buf, sizeof(block)); - crc = __crc32cd(crc, block); - buf += sizeof(block); + while (count_pre--) { + crc = __crc32cb(crc, *(const uint8_t *)buf); + buf++; } - count = len & 7; - while (count--) { + dword_buf = (const uint64_t *)buf; + while (count_mid--) { + crc = __crc32cd(crc, *dword_buf); + dword_buf++; + } + + buf = dword_buf; + while (count_post--) { crc = __crc32cb(crc, *(const uint8_t *)buf); buf++; }