From 199fa6015d774e265ccec87ae99d9e36e62f9045 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Thu, 6 May 2021 21:20:45 +0000 Subject: [PATCH] util: add zipf random number generator zipf is a power law probability distribution. When applied to performance testing of block devices, it will select blocks over the full range of LBAs, but will more frequently select lower-numbered LBAs. The theta parameter governs the distribution - higher values of theta will concentrate the distribution on a smaller number of LBAs. Note that fio supports zipf, so adding it to SPDK will enable our perf tools (bdevperf, nvme-perf) to provide similar functionality. Signed-off-by: Jim Harris Change-Id: I7df129c9d61996a2070188c6cd9f1fde631ac208 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7779 Tested-by: SPDK CI Jenkins Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto --- include/spdk/stdinc.h | 1 + include/spdk/zipf.h | 83 ++++++++++++++++++++++++ lib/util/Makefile | 4 +- lib/util/spdk_util.map | 5 ++ lib/util/zipf.c | 139 +++++++++++++++++++++++++++++++++++++++++ mk/spdk.common.mk | 1 + 6 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 include/spdk/zipf.h create mode 100644 lib/util/zipf.c diff --git a/include/spdk/stdinc.h b/include/spdk/stdinc.h index 65820d58e..f7501f820 100644 --- a/include/spdk/stdinc.h +++ b/include/spdk/stdinc.h @@ -50,6 +50,7 @@ extern "C" { #include #include #include +#include #include #include #include diff --git a/include/spdk/zipf.h b/include/spdk/zipf.h new file mode 100644 index 000000000..d68c417b7 --- /dev/null +++ b/include/spdk/zipf.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Zipf random number distribution + */ + +#ifndef SPDK_ZIPF_H +#define SPDK_ZIPF_H + +#include "spdk/stdinc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_zipf; + +/** + * Create a zipf random number generator. + * + * Numbers from [0, range) will be returned by the generator when + * calling \ref spdk_zipf_generate. + * + * \param range Range of values for the zipf distribution. + * \param theta Theta distribution parameter. + * \param seed Seed value for the random number generator. + * + * \return a pointer to the new zipf generator. + */ +struct spdk_zipf *spdk_zipf_create(uint64_t range, double theta, uint32_t seed); + +/** + * Free a zipf generator and set the pointer to NULL. + * + * \param zipfp Zipf generator to free. + */ +void spdk_zipf_free(struct spdk_zipf **zipfp); + +/** + * Generate a value from the zipf generator. + * + * \param zipf Zipf generator to generate the value from. + * + * \return value in the range [0, range) + */ +uint64_t spdk_zipf_generate(struct spdk_zipf *zipf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/util/Makefile b/lib/util/Makefile index f4eb147c2..f08543de6 100644 --- a/lib/util/Makefile +++ b/lib/util/Makefile @@ -35,11 +35,11 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk SO_VER := 3 -SO_MINOR := 0 +SO_MINOR := 1 C_SRCS = base64.c bit_array.c cpuset.c crc16.c crc32.c crc32c.c crc32_ieee.c \ dif.c fd.c file.c iov.c math.c pipe.c strerror_tls.c string.c uuid.c \ - fd_group.c + fd_group.c zipf.c LIBNAME = util LOCAL_SYS_LIBS = -luuid diff --git a/lib/util/spdk_util.map b/lib/util/spdk_util.map index 31b191af0..f182f423f 100644 --- a/lib/util/spdk_util.map +++ b/lib/util/spdk_util.map @@ -144,5 +144,10 @@ spdk_fd_group_event_modify; spdk_fd_group_get_fd; + # public functions in zipf.h + spdk_zipf_create; + spdk_zipf_free; + spdk_zipf_generate; + local: *; }; diff --git a/lib/util/zipf.c b/lib/util/zipf.c new file mode 100644 index 000000000..e7a1106bd --- /dev/null +++ b/lib/util/zipf.c @@ -0,0 +1,139 @@ +/*- + * BSD LICENSE + * + * Copyright(c) Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" +#include "spdk/util.h" +#include "spdk/zipf.h" + +struct spdk_zipf { + uint64_t range; + double alpha; + double eta; + double theta; + double zetan; + double val1_limit; + uint32_t seed; +}; + +static double +zeta_increment(uint64_t n, double theta) +{ + return pow((double) 1.0 / (n + 1), theta); +} + +static double +zeta(uint64_t range, double theta) +{ + double zetan = 0; + double inc1, inc2; + uint64_t i, calc, count; + const uint32_t ZIPF_MAX_ZETA_CALC = 10 * 1000 * 1000; + const uint32_t ZIPF_ZETA_ESTIMATE = 1 * 1000 * 1000; + + /* Cumulate zeta discretely for the first ZIPF_MAX_ZETA_CALC + * entries in the range. + */ + calc = spdk_min(ZIPF_MAX_ZETA_CALC, range); + for (i = 0; i < calc; i++) { + zetan += zeta_increment(i, theta); + } + + /* For the remaining values in the range, increment zetan + * with an approximation for every ZIPF_ZETA_ESTIMATE + * entries. We will take an average of the increment + * for (i) and (i + ZIPF_ZETA_ESTIMATE), and then multiply + * that by ZIPF_ZETA_ESTIMATE. + * + * Of course, we'll cap ZIPF_ZETA_ESTIMATE to something + * smaller if necessary at the end of the range. + */ + while (i < range) { + count = spdk_min(ZIPF_ZETA_ESTIMATE, range - i); + inc1 = zeta_increment(i, theta); + inc2 = zeta_increment(i + count, theta); + zetan += (inc1 + inc2) * count / 2; + i += count; + } + + return zetan; +} + +struct spdk_zipf * +spdk_zipf_create(uint64_t range, double theta, uint32_t seed) +{ + struct spdk_zipf *zipf; + + zipf = calloc(1, sizeof(*zipf)); + if (zipf == NULL) { + return NULL; + } + + zipf->range = range; + zipf->seed = seed; + + zipf->theta = theta; + zipf->alpha = 1.0 / (1.0 - zipf->theta); + zipf->zetan = zeta(range, theta); + zipf->eta = (1.0 - pow(2.0 / zipf->range, 1.0 - zipf->theta)) / + (1.0 - zeta(2, theta) / zipf->zetan); + zipf->val1_limit = 1.0 + pow(0.5, zipf->theta); + + return zipf; +} + +void +spdk_zipf_free(struct spdk_zipf **zipfp) +{ + assert(zipfp != NULL); + free(*zipfp); + *zipfp = NULL; +} + +uint64_t +spdk_zipf_generate(struct spdk_zipf *zipf) +{ + double randu, randz; + uint64_t val; + + randu = (double)rand_r(&zipf->seed) / RAND_MAX; + randz = randu * zipf->zetan; + + if (randz < 1.0) { + return 0; + } else if (randz < zipf->val1_limit) { + return 1; + } else { + val = zipf->range * pow(zipf->eta * (randu - 1.0) + 1.0, zipf->alpha); + return val % zipf->range; + } +} diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk index e3f7b3a75..d3bf5689b 100644 --- a/mk/spdk.common.mk +++ b/mk/spdk.common.mk @@ -295,6 +295,7 @@ CXXFLAGS += $(COMMON_CFLAGS) SYS_LIBS += -lrt SYS_LIBS += -luuid SYS_LIBS += -lcrypto +SYS_LIBS += -lm ifneq ($(CONFIG_NVME_CUSE)$(CONFIG_FUSE),nn) SYS_LIBS += -lfuse3