util: fix misaligned load for uint64_t type
The following error was reported when running gpt_ut which is related to crc32_update(). "load of misaligned address 0x001ffeff78cc for type 'const uint64_t', which requires 8 byte alignment". This patch preprocesses the first several bytes to make the buf address passed to __crc32_d or__crc32_cd is 8 byte aligned. And finally process the trailing bytes. For function spdk_crc32c_update in crc32c.c, memcpy was used to avoid misaligned load problem. Update it with above solution to reduce extra overhead. Signed-off-by: Richael Zhuang <richael.zhuang@arm.com> Change-Id: I7c7aaa41e1c042a96668158818b06729fb3ceec6 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16801 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
2d590e74b4
commit
fed358a0e7
@ -31,22 +31,33 @@ crc32_table_init(struct spdk_crc32_table *table, uint32_t polynomial_reflect)
|
||||
uint32_t
|
||||
crc32_update(const struct spdk_crc32_table *table, const void *buf, size_t len, uint32_t crc)
|
||||
{
|
||||
size_t count;
|
||||
size_t count_pre, count_post, count_mid;
|
||||
const uint64_t *dword_buf;
|
||||
|
||||
count = len & 7;
|
||||
while (count--) {
|
||||
/* process the head and tail bytes seperately to make the buf address
|
||||
* passed to crc32_d is 8 byte aligned. This can avoid unaligned loads.
|
||||
*/
|
||||
count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
|
||||
count_post = (uint64_t)(buf + len) & 7;
|
||||
count_mid = (len - count_pre - count_post) / 8;
|
||||
|
||||
while (count_pre--) {
|
||||
crc = __crc32b(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
dword_buf = (const uint64_t *)buf;
|
||||
|
||||
count = len / 8;
|
||||
while (count--) {
|
||||
dword_buf = (const uint64_t *)buf;
|
||||
while (count_mid--) {
|
||||
crc = __crc32d(crc, *dword_buf);
|
||||
dword_buf++;
|
||||
}
|
||||
|
||||
buf = dword_buf;
|
||||
while (count_post--) {
|
||||
crc = __crc32b(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
@ -20,30 +20,34 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
|
||||
uint32_t
|
||||
spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
|
||||
{
|
||||
size_t count_pre, count_post, count_mid;
|
||||
const uint64_t *dword_buf;
|
||||
uint64_t crc_tmp64;
|
||||
size_t count;
|
||||
|
||||
/* process the head and tail bytes seperately to make the buf address
|
||||
* passed to _mm_crc32_u64 is 8 byte aligned. This can avoid unaligned loads.
|
||||
*/
|
||||
count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
|
||||
count_post = (uint64_t)(buf + len) & 7;
|
||||
count_mid = (len - count_pre - count_post) / 8;
|
||||
|
||||
while (count_pre--) {
|
||||
crc = _mm_crc32_u8(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
|
||||
/* _mm_crc32_u64() needs a 64-bit intermediate value */
|
||||
crc_tmp64 = crc;
|
||||
dword_buf = (const uint64_t *)buf;
|
||||
|
||||
/* Process as much of the buffer as possible in 64-bit blocks. */
|
||||
count = len / 8;
|
||||
while (count--) {
|
||||
uint64_t block;
|
||||
|
||||
/*
|
||||
* Use memcpy() to avoid unaligned loads, which are undefined behavior in C.
|
||||
* The compiler will optimize out the memcpy() in release builds.
|
||||
*/
|
||||
memcpy(&block, buf, sizeof(block));
|
||||
crc_tmp64 = _mm_crc32_u64(crc_tmp64, block);
|
||||
buf += sizeof(block);
|
||||
while (count_mid--) {
|
||||
crc_tmp64 = _mm_crc32_u64(crc_tmp64, *dword_buf);
|
||||
dword_buf++;
|
||||
}
|
||||
crc = (uint32_t)crc_tmp64;
|
||||
|
||||
/* Handle any trailing bytes. */
|
||||
count = len & 7;
|
||||
while (count--) {
|
||||
buf = dword_buf;
|
||||
crc = (uint32_t)crc_tmp64;
|
||||
while (count_post--) {
|
||||
crc = _mm_crc32_u8(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
@ -56,19 +60,29 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
|
||||
uint32_t
|
||||
spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
|
||||
{
|
||||
size_t count;
|
||||
size_t count_pre, count_post, count_mid;
|
||||
const uint64_t *dword_buf;
|
||||
|
||||
count = len / 8;
|
||||
while (count--) {
|
||||
uint64_t block;
|
||||
/* process the head and tail bytes seperately to make the buf address
|
||||
* passed to crc32_cd is 8 byte aligned. This can avoid unaligned loads.
|
||||
*/
|
||||
count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
|
||||
count_post = (uint64_t)(buf + len) & 7;
|
||||
count_mid = (len - count_pre - count_post) / 8;
|
||||
|
||||
memcpy(&block, buf, sizeof(block));
|
||||
crc = __crc32cd(crc, block);
|
||||
buf += sizeof(block);
|
||||
while (count_pre--) {
|
||||
crc = __crc32cb(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
|
||||
count = len & 7;
|
||||
while (count--) {
|
||||
dword_buf = (const uint64_t *)buf;
|
||||
while (count_mid--) {
|
||||
crc = __crc32cd(crc, *dword_buf);
|
||||
dword_buf++;
|
||||
}
|
||||
|
||||
buf = dword_buf;
|
||||
while (count_post--) {
|
||||
crc = __crc32cb(crc, *(const uint8_t *)buf);
|
||||
buf++;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user