From 1edd9bf3e467eb7a9591aee2216eccdfb8cb4dfa Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 22 Mar 2017 13:35:00 -0700 Subject: [PATCH] blobfs: Add a lightweight filesystem built on the blobstore This is the initial commit for "blobfs", a lightweight filesystem built on top of the SPDK blobstore. Also included in this patch: 1) a shim for using SPDK bdevs as the backing store for SPDK blobstore/blobfs 2) documentation for using blobfs as the storage engine with RocksDB 3) scripts for running a set of workloads and collecting profiling data with RocksDB and blobfs See doc/blobfs/getting_started.md included in this commit for more details on blobfs, including some of the current limitations. Signed-off-by: Jim Harris Change-Id: I2a6d3d4b87236730051228ed62c0c04e04c42c73 --- autotest.sh | 5 + doc/Doxyfile | 2 + doc/blobfs/getting_started.md | 62 + doc/blobfs/index.md | 3 + doc/index.md | 1 + etc/spdk/rocksdb.conf.in | 37 + include/spdk/blob_bdev.h | 54 + include/spdk/blobfs.h | 135 ++ lib/Makefile | 2 +- lib/blob/Makefile | 2 + lib/blob/bdev/Makefile | 40 + lib/blob/bdev/blob_bdev.c | 167 ++ lib/blobfs/Makefile | 40 + lib/blobfs/blobfs.c | 2209 +++++++++++++++++++++++++ lib/blobfs/blobfs_internal.h | 108 ++ lib/blobfs/tree.c | 182 ++ test/blobfs/rocksdb/.gitignore | 1 + test/blobfs/rocksdb/common_flags.txt | 27 + test/blobfs/rocksdb/postprocess.py | 70 + test/blobfs/rocksdb/rocksdb.sh | 40 + test/blobfs/rocksdb/run_tests.sh | 185 +++ test/lib/Makefile | 2 +- test/lib/blobfs/Makefile | 49 + test/lib/blobfs/blobfs_ut/.gitignore | 1 + test/lib/blobfs/blobfs_ut/Makefile | 56 + test/lib/blobfs/blobfs_ut/blobfs_ut.c | 398 +++++ test/lib/blobfs/cache_ut/.gitignore | 1 + test/lib/blobfs/cache_ut/Makefile | 56 + test/lib/blobfs/cache_ut/cache_ut.c | 282 ++++ test/lib/blobfs/fuse/.gitignore | 1 + test/lib/blobfs/fuse/Makefile | 60 + test/lib/blobfs/fuse/fuse.c | 356 ++++ test/lib/blobfs/mkfs/.gitignore | 1 + test/lib/blobfs/mkfs/Makefile | 58 + test/lib/blobfs/mkfs/mkfs.c | 124 ++ unittest.sh | 3 + 36 files changed, 4818 insertions(+), 2 deletions(-) create mode 100644 doc/blobfs/getting_started.md create mode 100644 doc/blobfs/index.md create mode 100644 etc/spdk/rocksdb.conf.in create mode 100644 include/spdk/blob_bdev.h create mode 100644 include/spdk/blobfs.h create mode 100644 lib/blob/bdev/Makefile create mode 100644 lib/blob/bdev/blob_bdev.c create mode 100644 lib/blobfs/Makefile create mode 100644 lib/blobfs/blobfs.c create mode 100644 lib/blobfs/blobfs_internal.h create mode 100644 lib/blobfs/tree.c create mode 100644 test/blobfs/rocksdb/.gitignore create mode 100644 test/blobfs/rocksdb/common_flags.txt create mode 100755 test/blobfs/rocksdb/postprocess.py create mode 100755 test/blobfs/rocksdb/rocksdb.sh create mode 100755 test/blobfs/rocksdb/run_tests.sh create mode 100644 test/lib/blobfs/Makefile create mode 100644 test/lib/blobfs/blobfs_ut/.gitignore create mode 100644 test/lib/blobfs/blobfs_ut/Makefile create mode 100644 test/lib/blobfs/blobfs_ut/blobfs_ut.c create mode 100644 test/lib/blobfs/cache_ut/.gitignore create mode 100644 test/lib/blobfs/cache_ut/Makefile create mode 100644 test/lib/blobfs/cache_ut/cache_ut.c create mode 100644 test/lib/blobfs/fuse/.gitignore create mode 100644 test/lib/blobfs/fuse/Makefile create mode 100644 test/lib/blobfs/fuse/fuse.c create mode 100644 test/lib/blobfs/mkfs/.gitignore create mode 100644 test/lib/blobfs/mkfs/Makefile create mode 100644 test/lib/blobfs/mkfs/mkfs.c diff --git a/autotest.sh b/autotest.sh index 9a67987c9..4e839720e 100755 --- a/autotest.sh +++ b/autotest.sh @@ -14,6 +14,7 @@ fi : ${SPDK_TEST_BLOCKDEV=1}; export SPDK_TEST_BLOCKDEV : ${SPDK_TEST_IOAT=1}; export SPDK_TEST_IOAT : ${SPDK_TEST_EVENT=1}; export SPDK_TEST_EVENT +: ${SPDK_TEST_BLOBFS=1}; export SPDK_TEST_BLOBFS rootdir=$(readlink -f $(dirname $0)) @@ -141,6 +142,10 @@ if [ $(uname -s) = Linux ] && [ $SPDK_TEST_ISCSI -eq 1 ]; then run_test test/lib/iscsi/iscsi.sh fi +if [ $SPDK_TEST_BLOBFS -eq 1 ]; then + run_test ./test/blobfs/rocksdb/rocksdb.sh +fi + if [ $SPDK_TEST_NVMF -eq 1 ]; then timing_enter nvmf diff --git a/doc/Doxyfile b/doc/Doxyfile index dbf05753f..9ba9aa281 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -764,6 +764,8 @@ INPUT = ../include/spdk \ porting.md \ bdev/index.md \ bdev/getting_started.md \ + blobfs/index.md \ + blobfs/getting_started.md \ event/index.md \ ioat/index.md \ iscsi/index.md \ diff --git a/doc/blobfs/getting_started.md b/doc/blobfs/getting_started.md new file mode 100644 index 000000000..a8f572be9 --- /dev/null +++ b/doc/blobfs/getting_started.md @@ -0,0 +1,62 @@ +# BlobFS Getting Started Guide {#blobfs_getting_started} + +# RocksDB Integration {#blobfs_rocksdb} + +1. Build SPDK as normal. + +2. Clone the RocksDB git repo from the SPDK github projects. Make sure you check out the spdk branch. + + git clone -b spdk https://github.com/spdk/rocksdb.git + +3. Build RocksDB. Note that currently only the db_bench benchmarking tool is integrated with BlobFS. + (Note: add "DEBUG_LEVEL=0" for a release build.) + + make db_bench DPDK_DIR=path/to/dpdk/x86_64-native-linuxapp-gcc SPDK_DIR=path/to/spdk + +4. Copy etc/spdk/rocksdb.conf.in to /usr/local/etc/spdk/rocksdb.conf. + +5. Append an NVMe section to the configuration file. + + scripts/gen_nvme.sh >> /usr/local/etc/spdk/rocksdb.conf + +6. Verify the configuration file has specified the correct NVMe SSD. If there are any NVMe SSDs you do not wish to use for RocksDB/SPDK testing, remove them from the configuration file. +7. Make sure you have at least 5GB of memory allocated for huge pages. By default the SPDK setup.sh script only allocates 2GB (1024 huge pages). The following will allocate 5GB worth of 2MB huge pages (in addition to binding the NVMe devices to uio/vfio). If using 1GB huge pages, adjust the NRHUGE value accordingly. + + NRHUGE=2560 scripts/setup.sh + +8. Create an empty SPDK blobfs for testing. + + test/lib/blobfs/mkfs/mkfs /usr/local/etc/spdk/rocksdb.conf Nvme0n1 + +At this point, RocksDB is ready for testing with SPDK. Three db_bench parameters are used to configure SPDK: + +1. spdk - Defines the name of the SPDK configuration file. If omitted, RocksDB will use the default PosixEnv implementation + instead of SpdkEnv. (Required) +2. spdk_bdev - Defines the name of the SPDK block device which contains the BlobFS to be used for testing. (Required) +3. spdk_cache_size - Defines the amount of userspace cache memory used by SPDK. Specified in terms of megabytes (MB). + Default is 4096 (4GB). (Optional) + +SPDK has a set of scripts which will run db_bench against a variety of workloads and capture performance and profiling +data. The primary script is `test/blobfs/rocksdb/run_tests.sh`. + +# FUSE + +BlobFS provides a FUSE plug-in to mount an SPDK BlobFS as a kernel filesystem for inspection or debug purposes. +The FUSE plug-in requires fuse3 and will be built automatically when fuse3 is detected on the system. + + test/lib/blobfs/fuse/fuse /usr/local/etc/spdk/rocksdb.conf Nvme0n1 /mnt/fuse + +Note that the FUSE plug-in has some limitations - see the list below. + +# Limitations + +* BlobFS has primarily been tested with RocksDB so far, so any use cases different from how RocksDB uses a filesystem + may run into issues. BlobFS will be tested in a broader range of use cases after this initial release. +* Only a synchronous API is currently supported. An asynchronous API has been developed but not thoroughly tested + yet so is not part of the public interface yet. This will be added in a future release. +* File renames are not atomic. This will be fixed in a future release. +* BlobFS currently supports only a flat namespace for files with no directory support. Filenames are currently stored + as xattrs in each blob. This means that filename lookup is an O(n) operation. An SPDK btree implementation is + underway which will be the underpinning for BlobFS directory support in a future release. +* Writes to a file must always append to the end of the file. Support for writes to any location within the file + will be added in a future release. diff --git a/doc/blobfs/index.md b/doc/blobfs/index.md new file mode 100644 index 000000000..27b4c4ebb --- /dev/null +++ b/doc/blobfs/index.md @@ -0,0 +1,3 @@ +# BlobFS (Blobstore Filesystem) {#blobfs} + +- @ref blobfs_getting_started diff --git a/doc/index.md b/doc/index.md index 06f1b16f1..76df9e6ff 100644 --- a/doc/index.md +++ b/doc/index.md @@ -25,3 +25,4 @@ which avoids kernel context switches and eliminates interrupt handling overhead. - @ref ioat - @ref iscsi - @ref bdev +- @ref blobfs diff --git a/etc/spdk/rocksdb.conf.in b/etc/spdk/rocksdb.conf.in new file mode 100644 index 000000000..03288f2fa --- /dev/null +++ b/etc/spdk/rocksdb.conf.in @@ -0,0 +1,37 @@ +# spdk configuration file +# +# Please write all parameters using ASCII. +# The parameter must be quoted if it includes whitespace. +# +# Configuration syntax: +# Spaces at head of line are deleted, other spaces are as separator +# Lines starting with '#' are comments and not evaluated. +# Lines ending with '\' are concatenated with the next line. +# Bracketed keys are section keys grouping the following value keys. +# Number of section key is used as a tag number. +# Ex. [TargetNode1] = TargetNode section key with tag number 1 +[Global] + # Users can restrict work items to only run on certain cores by + # specifying a WorkerMask. Default is to allow work items to run + # on all cores. + #WorkerMask 0xFFFF + + # Event mask for ids history buffers + # Default: 0x0 (all events disabled) + # Set to 0xFFFFFFFFFFFFFFFF to enable all events. + #EventMask 0x0 + + # syslog facility + LogFacility "local7" + +[Rpc] + # Defines whether spdk will enable configuration via RPC. + # Default is disabled. Note that the RPC interface is not + # authenticated, so users should be careful about enabling + # RPC in non-trusted environments. + #Enable No + +[Ioat] + Disable Yes + +# [Nvme] section will get appended here by scripts/gen_nvme.sh. diff --git a/include/spdk/blob_bdev.h b/include/spdk/blob_bdev.h new file mode 100644 index 000000000..86fe54bed --- /dev/null +++ b/include/spdk/blob_bdev.h @@ -0,0 +1,54 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * Helper library to use spdk_bdev as the backing device for a blobstore + */ + +#ifndef SPDK_BLOB_BDEV_H +#define SPDK_BLOB_BDEV_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct spdk_bs_dev; +struct spdk_bdev; + +struct spdk_bs_dev *spdk_bdev_create_bs_dev(struct spdk_bdev *); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/spdk/blobfs.h b/include/spdk/blobfs.h new file mode 100644 index 000000000..863e829d6 --- /dev/null +++ b/include/spdk/blobfs.h @@ -0,0 +1,135 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * SPDK Filesystem + */ + +#ifndef SPDK_FS_H +#define SPDK_FS_H + +#include +#include +#include + +#include "spdk/blob.h" + +#define SPDK_FILE_NAME_MAX 255 + +struct spdk_file; +struct spdk_filesystem; + +typedef struct spdk_file *spdk_fs_iter; + +struct spdk_file_stat { + spdk_blob_id blobid; + uint64_t size; +}; + +typedef void (*spdk_fs_op_with_handle_complete)(void *ctx, struct spdk_filesystem *fs, + int fserrno); +typedef void (*spdk_file_op_with_handle_complete)(void *ctx, struct spdk_file *f, int fserrno); +typedef spdk_bs_op_complete spdk_fs_op_complete; + +typedef void (*spdk_file_op_complete)(void *ctx, int fserrno); +typedef void (*spdk_file_stat_op_complete)(void *ctx, struct spdk_file_stat *stat, int fserrno); + +typedef void (*fs_request_fn)(void *); +typedef void (*fs_send_request_fn)(fs_request_fn, void *); + +void spdk_fs_init(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg); +void spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg); +void spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg); + +struct spdk_io_channel *spdk_fs_alloc_io_channel(struct spdk_filesystem *fs, uint32_t priority); + +/* + * Allocates an I/O channel suitable for using the synchronous blobfs API. These channels do + * not allocate an I/O channel for the underlying blobstore, but rather allocate synchronizaiton + * primitives used to block until any necessary I/O operations are completed on a separate + * polling thread. + */ +struct spdk_io_channel *spdk_fs_alloc_io_channel_sync(struct spdk_filesystem *fs, + uint32_t priority); + +void spdk_fs_free_io_channel(struct spdk_io_channel *channel); + +int spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_io_channel *channel, + const char *name, struct spdk_file_stat *stat); + +#define SPDK_BLOBFS_OPEN_CREATE (1ULL << 0) + +int spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_io_channel *channel, + const char *name); + +int spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_io_channel *channel, + const char *name, uint32_t flags, struct spdk_file **file); + +int spdk_file_close(struct spdk_file *file, struct spdk_io_channel *channel); + +int spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_io_channel *channel, + const char *old_name, const char *new_name); + +int spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_io_channel *channel, + const char *name); + +spdk_fs_iter spdk_fs_iter_first(struct spdk_filesystem *fs); +spdk_fs_iter spdk_fs_iter_next(spdk_fs_iter iter); +#define spdk_fs_iter_get_file(iter) ((struct spdk_file *)(iter)) + +void spdk_file_truncate(struct spdk_file *file, struct spdk_io_channel *channel, + uint64_t length); + +const char *spdk_file_get_name(struct spdk_file *file); + +uint64_t spdk_file_get_length(struct spdk_file *file); + +int spdk_file_write(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length); + +int64_t spdk_file_read(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length); + +void spdk_fs_set_cache_size(uint64_t size_in_mb); +uint64_t spdk_fs_get_cache_size(void); + +#define SPDK_FILE_PRIORITY_LOW 0 /* default */ +#define SPDK_FILE_PRIORITY_HIGH 1 + +void spdk_file_set_priority(struct spdk_file *file, uint32_t priority); + +int spdk_file_sync(struct spdk_file *file, struct spdk_io_channel *channel); + +#endif /* SPDK_FS_H_ */ diff --git a/lib/Makefile b/lib/Makefile index 073142786..2568fc3f5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y += bdev conf copy cunit event json jsonrpc \ +DIRS-y += bdev blob blobfs conf copy cunit event json jsonrpc \ log env_dpdk net rpc trace util nvme nvmf scsi ioat ifeq ($(OS),Linux) DIRS-y += iscsi diff --git a/lib/blob/Makefile b/lib/blob/Makefile index dd77b7f8d..4a6970f4e 100644 --- a/lib/blob/Makefile +++ b/lib/blob/Makefile @@ -38,4 +38,6 @@ CFLAGS += $(ENV_CFLAGS) C_SRCS = blobstore.c request.c LIBNAME = blob +DIRS-y += bdev + include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/blob/bdev/Makefile b/lib/blob/bdev/Makefile new file mode 100644 index 000000000..dbc25dfb9 --- /dev/null +++ b/lib/blob/bdev/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = blob_bdev.c +LIBNAME = blob_bdev + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/blob/bdev/blob_bdev.c b/lib/blob/bdev/blob_bdev.c new file mode 100644 index 000000000..82e8a2118 --- /dev/null +++ b/lib/blob/bdev/blob_bdev.c @@ -0,0 +1,167 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/blob_bdev.h" +#include "spdk/blob.h" +#include "spdk/bdev.h" +#include "spdk/io_channel.h" +#include "spdk/log.h" +#include "spdk/endian.h" + +struct blob_bdev { + struct spdk_bs_dev bs_dev; + struct spdk_bdev *bdev; +}; + +static inline struct spdk_bdev * +__get_bdev(struct spdk_bs_dev *dev) +{ + return ((struct blob_bdev *)dev)->bdev; +} + +static void +bdev_blob_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status, void *arg) +{ + struct spdk_bs_dev_cb_args *cb_args = arg; + int bserrno; + + if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { + bserrno = 0; + } else { + bserrno = -EIO; + } + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, bserrno); + spdk_bdev_free_io(bdev_io); +} + +static void +bdev_blob_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) +{ + struct spdk_bdev *bdev = __get_bdev(dev); + struct spdk_bdev_io *bdev_io; + + bdev_io = spdk_bdev_read(bdev, channel, payload, lba * bdev->blocklen, + lba_count * bdev->blocklen, bdev_blob_io_complete, cb_args); + if (bdev_io == NULL) { + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); + } +} + +static void +bdev_blob_write(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) +{ + struct spdk_bdev *bdev = __get_bdev(dev); + struct spdk_bdev_io *bdev_io; + + bdev_io = spdk_bdev_write(bdev, channel, payload, lba * bdev->blocklen, + lba_count * bdev->blocklen, bdev_blob_io_complete, cb_args); + if (bdev_io == NULL) { + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); + } +} + +static void +bdev_blob_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, uint64_t lba, + uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) +{ + struct spdk_bdev *bdev = __get_bdev(dev); + struct spdk_scsi_unmap_bdesc *desc; + struct spdk_bdev_io *bdev_io; + + SPDK_STATIC_ASSERT(sizeof(cb_args->scratch) >= sizeof(*desc), "scratch too small"); + + desc = (struct spdk_scsi_unmap_bdesc *)cb_args->scratch; + to_be64(&desc->lba, lba); + to_be32(&desc->block_count, lba_count); + desc->reserved = 0; + + bdev_io = spdk_bdev_unmap(bdev, channel, desc, 1, bdev_blob_io_complete, cb_args); + if (bdev_io == NULL) { + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); + } +} + +static struct spdk_io_channel * +bdev_blob_create_channel(struct spdk_bs_dev *dev) +{ + struct spdk_bdev *bdev = __get_bdev(dev); + + return spdk_bdev_get_io_channel(bdev, SPDK_IO_PRIORITY_DEFAULT); +} + +static void +bdev_blob_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel) +{ + spdk_put_io_channel(channel); +} + +static void +bdev_blob_destroy(struct spdk_bs_dev *bs_dev) +{ + free(bs_dev); +} + +struct spdk_bs_dev * +spdk_bdev_create_bs_dev(struct spdk_bdev *bdev) +{ + struct blob_bdev *b; + + b = calloc(1, sizeof(*b)); + + if (b == NULL) { + SPDK_ERRLOG("could not allocate blob_bdev\n"); + return NULL; + } + + b->bdev = bdev; + b->bs_dev.blockcnt = bdev->blockcnt; + b->bs_dev.blocklen = bdev->blocklen; + b->bs_dev.create_channel = bdev_blob_create_channel; + b->bs_dev.destroy_channel = bdev_blob_destroy_channel; + b->bs_dev.destroy = bdev_blob_destroy; + b->bs_dev.read = bdev_blob_read; + b->bs_dev.write = bdev_blob_write; + b->bs_dev.unmap = bdev_blob_unmap; + + return &b->bs_dev; +} diff --git a/lib/blobfs/Makefile b/lib/blobfs/Makefile new file mode 100644 index 000000000..ea36b6ab1 --- /dev/null +++ b/lib/blobfs/Makefile @@ -0,0 +1,40 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +C_SRCS = blobfs.c tree.c +LIBNAME = blobfs + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/lib/blobfs/blobfs.c b/lib/blobfs/blobfs.c new file mode 100644 index 000000000..70034fd03 --- /dev/null +++ b/lib/blobfs/blobfs.c @@ -0,0 +1,2209 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "spdk/blobfs.h" +#include "blobfs_internal.h" + +#include "spdk/queue.h" +#include "spdk/io_channel.h" +#include "spdk/assert.h" +#include "spdk/env.h" +#include "spdk_internal/log.h" + +#define BLOBFS_TRACE(file, str, args...) \ + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s " str, file->name, ##args) + +#define BLOBFS_TRACE_RW(file, str, args...) \ + SPDK_TRACELOG(SPDK_TRACE_BLOBFS_RW, "file=%s " str, file->name, ##args) + +#define BLOBFS_CACHE_SIZE (4ULL * 1024 * 1024 * 1024) + +static uint64_t g_fs_cache_size = BLOBFS_CACHE_SIZE; +static struct spdk_mempool *g_cache_pool; +static TAILQ_HEAD(, spdk_file) g_caches; +static pthread_spinlock_t g_caches_lock; + +static void +__sem_post(void *arg, int bserrno) +{ + sem_t *sem = arg; + + sem_post(sem); +} + +void +spdk_cache_buffer_free(struct cache_buffer *cache_buffer) +{ + spdk_mempool_put(g_cache_pool, cache_buffer->buf); + free(cache_buffer); +} + +#define CACHE_READAHEAD_THRESHOLD (128 * 1024) + +struct spdk_file { + struct spdk_filesystem *fs; + struct spdk_blob *blob; + char *name; + uint64_t length; + bool open_for_writing; + uint64_t length_flushed; + uint64_t append_pos; + uint64_t seq_byte_count; + uint64_t next_seq_offset; + uint32_t priority; + TAILQ_ENTRY(spdk_file) tailq; + spdk_blob_id blobid; + uint32_t ref_count; + pthread_spinlock_t lock; + struct cache_buffer *last; + struct cache_tree *tree; + TAILQ_HEAD(open_requests_head, spdk_fs_request) open_requests; + TAILQ_HEAD(sync_requests_head, spdk_fs_request) sync_requests; + TAILQ_ENTRY(spdk_file) cache_tailq; +}; + +struct spdk_filesystem { + struct spdk_blob_store *bs; + TAILQ_HEAD(, spdk_file) files; + struct spdk_bs_opts bs_opts; + struct spdk_bs_dev *bdev; + fs_send_request_fn send_request; + struct spdk_io_channel *sync_io_channel; + struct spdk_fs_channel *sync_fs_channel; + struct spdk_io_channel *md_io_channel; + struct spdk_fs_channel *md_fs_channel; +}; + +struct spdk_fs_cb_args { + union { + spdk_fs_op_with_handle_complete fs_op_with_handle; + spdk_fs_op_complete fs_op; + spdk_file_op_with_handle_complete file_op_with_handle; + spdk_file_op_complete file_op; + spdk_file_stat_op_complete stat_op; + } fn; + void *arg; + sem_t *sem; + struct spdk_filesystem *fs; + struct spdk_file *file; + int rc; + bool from_request; + union { + struct { + uint64_t length; + } truncate; + struct { + struct spdk_io_channel *channel; + void *user_buf; + void *pin_buf; + int is_read; + off_t offset; + size_t length; + uint64_t start_page; + uint64_t num_pages; + uint32_t blocklen; + } rw; + struct { + const char *old_name; + const char *new_name; + } rename; + struct { + struct cache_buffer *cache_buffer; + uint64_t length; + } flush; + struct { + struct cache_buffer *cache_buffer; + uint64_t length; + uint64_t offset; + } readahead; + struct { + uint64_t offset; + TAILQ_ENTRY(spdk_fs_request) tailq; + } sync; + struct { + uint32_t num_clusters; + } resize; + struct { + const char *name; + uint32_t flags; + TAILQ_ENTRY(spdk_fs_request) tailq; + } open; + struct { + const char *name; + } create; + struct { + const char *name; + } delete; + struct { + const char *name; + } stat; + } op; +}; + +static void cache_free_buffers(struct spdk_file *file); + +static void +__initialize_cache(void) +{ + if (g_cache_pool != NULL) { + return; + } + + g_cache_pool = spdk_mempool_create("spdk_fs_cache", + g_fs_cache_size / CACHE_BUFFER_SIZE, + CACHE_BUFFER_SIZE, -1, SPDK_ENV_SOCKET_ID_ANY); + TAILQ_INIT(&g_caches); + pthread_spin_init(&g_caches_lock, 0); +} + +static uint64_t +__file_get_blob_size(struct spdk_file *file) +{ + uint64_t cluster_sz; + + cluster_sz = file->fs->bs_opts.cluster_sz; + return cluster_sz * spdk_blob_get_num_clusters(file->blob); +} + +struct spdk_fs_request { + struct spdk_fs_cb_args args; + TAILQ_ENTRY(spdk_fs_request) link; + struct spdk_fs_channel *channel; +}; + +struct spdk_fs_channel { + struct spdk_fs_request *req_mem; + TAILQ_HEAD(, spdk_fs_request) reqs; + sem_t sem; + struct spdk_filesystem *fs; + struct spdk_io_channel *bs_channel; + fs_send_request_fn send_request; +}; + +static struct spdk_fs_request * +alloc_fs_request(struct spdk_fs_channel *channel) +{ + struct spdk_fs_request *req; + + req = TAILQ_FIRST(&channel->reqs); + if (!req) { + return NULL; + } + TAILQ_REMOVE(&channel->reqs, req, link); + memset(req, 0, sizeof(*req)); + req->channel = channel; + req->args.from_request = true; + + return req; +} + +static void +free_fs_request(struct spdk_fs_request *req) +{ + TAILQ_INSERT_HEAD(&req->channel->reqs, req, link); +} + +static int +_spdk_fs_channel_create(void *io_device, uint32_t priority, void *ctx_buf, void *unique_ctx) +{ + struct spdk_filesystem *fs = io_device; + struct spdk_fs_channel *channel = ctx_buf; + uint32_t max_ops = *(uint32_t *)unique_ctx; + uint32_t i; + + channel->req_mem = calloc(max_ops, sizeof(struct spdk_fs_request)); + if (!channel->req_mem) { + free(channel); + return -1; + } + + TAILQ_INIT(&channel->reqs); + sem_init(&channel->sem, 0, 0); + + for (i = 0; i < max_ops; i++) { + TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link); + } + + channel->fs = fs; + + return 0; +} + +static void +_spdk_fs_channel_destroy(void *io_device, void *ctx_buf) +{ + struct spdk_fs_channel *channel = ctx_buf; + + free(channel->req_mem); + if (channel->bs_channel != NULL) { + spdk_bs_free_io_channel(channel->bs_channel); + } +} + +static void +__send_request_direct(fs_request_fn fn, void *arg) +{ + fn(arg); +} + +static void +common_fs_bs_init(struct spdk_filesystem *fs, struct spdk_blob_store *bs) +{ + fs->bs = bs; + fs->bs_opts.cluster_sz = spdk_bs_get_cluster_size(bs); + fs->md_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs, SPDK_IO_PRIORITY_DEFAULT, 512); + fs->md_fs_channel->send_request = __send_request_direct; + fs->sync_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs, SPDK_IO_PRIORITY_DEFAULT, 512); + fs->sync_fs_channel->send_request = __send_request_direct; +} + +static void +init_cb(void *ctx, struct spdk_blob_store *bs, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_filesystem *fs = args->fs; + + if (bserrno == 0) { + common_fs_bs_init(fs, bs); + } else { + free(fs); + } + + args->fn.fs_op_with_handle(args->arg, fs, bserrno); + free_fs_request(req); +} + +static struct spdk_filesystem * +fs_alloc(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn) +{ + struct spdk_filesystem *fs; + uint32_t max_ops = 512; + + fs = calloc(1, sizeof(*fs)); + if (fs == NULL) { + return NULL; + } + + fs->bdev = dev; + fs->send_request = send_request_fn; + TAILQ_INIT(&fs->files); + spdk_io_device_register(fs, _spdk_fs_channel_create, _spdk_fs_channel_destroy, + sizeof(struct spdk_fs_channel)); + + fs->md_io_channel = spdk_get_io_channel(fs, SPDK_IO_PRIORITY_DEFAULT, true, (void *)&max_ops); + fs->md_fs_channel = spdk_io_channel_get_ctx(fs->md_io_channel); + + fs->sync_io_channel = spdk_get_io_channel(fs, SPDK_IO_PRIORITY_DEFAULT, true, (void *)&max_ops); + fs->sync_fs_channel = spdk_io_channel_get_ctx(fs->sync_io_channel); + + __initialize_cache(); + + return fs; +} + +void +spdk_fs_init(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_filesystem *fs; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + fs = fs_alloc(dev, send_request_fn); + if (fs == NULL) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + args = &req->args; + args->fn.fs_op_with_handle = cb_fn; + args->arg = cb_arg; + args->fs = fs; + + spdk_bs_init(dev, NULL, init_cb, req); +} + +static struct spdk_file * +file_alloc(struct spdk_filesystem *fs) +{ + struct spdk_file *file; + + file = calloc(1, sizeof(*file)); + if (file == NULL) { + return NULL; + } + + file->fs = fs; + TAILQ_INIT(&file->open_requests); + TAILQ_INIT(&file->sync_requests); + pthread_spin_init(&file->lock, 0); + file->tree = calloc(1, sizeof(*file->tree)); + TAILQ_INSERT_TAIL(&fs->files, file, tailq); + file->priority = SPDK_FILE_PRIORITY_LOW; + return file; +} + +static void +iter_cb(void *ctx, struct spdk_blob *blob, int rc) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_filesystem *fs = args->fs; + struct spdk_file *f; + uint64_t *length; + const char *name; + size_t value_len; + + if (rc == -ENOENT) { + /* Finished iterating */ + args->fn.fs_op_with_handle(args->arg, fs, 0); + free_fs_request(req); + return; + } else if (rc < 0) { + args->fn.fs_op_with_handle(args->arg, fs, rc); + free_fs_request(req); + return; + } + + rc = spdk_bs_md_get_xattr_value(blob, "name", (const void **)&name, &value_len); + if (rc < 0) { + args->fn.fs_op_with_handle(args->arg, fs, rc); + free_fs_request(req); + return; + } + + rc = spdk_bs_md_get_xattr_value(blob, "length", (const void **)&length, &value_len); + if (rc < 0) { + args->fn.fs_op_with_handle(args->arg, fs, rc); + free_fs_request(req); + return; + } + assert(value_len == 8); + + f = file_alloc(fs); + if (f == NULL) { + args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM); + free_fs_request(req); + return; + } + + f->name = strdup(name); + f->blobid = spdk_blob_get_id(blob); + f->length = *length; + f->length_flushed = *length; + f->append_pos = *length; + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "added file %s length=%ju\n", f->name, f->length); + + spdk_bs_md_iter_next(fs->bs, &blob, iter_cb, req); +} + +static void +load_cb(void *ctx, struct spdk_blob_store *bs, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_filesystem *fs = args->fs; + + if (bserrno != 0) { + args->fn.fs_op_with_handle(args->arg, NULL, bserrno); + free_fs_request(req); + free(fs); + return; + } + + common_fs_bs_init(fs, bs); + spdk_bs_md_iter_first(fs->bs, iter_cb, req); +} + +void +spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn, + spdk_fs_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_filesystem *fs; + struct spdk_fs_cb_args *args; + struct spdk_fs_request *req; + + fs = fs_alloc(dev, send_request_fn); + if (fs == NULL) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + args = &req->args; + args->fn.fs_op_with_handle = cb_fn; + args->arg = cb_arg; + args->fs = fs; + + spdk_bs_load(dev, load_cb, req); +} + +static void +unload_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_filesystem *fs = args->fs; + + args->fn.fs_op(args->arg, bserrno); + free(req); + spdk_io_device_unregister(fs); + free(fs); +} + +void +spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg) +{ + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + /* + * We must free the md_channel before unloading the blobstore, so just + * allocate this request from the general heap. + */ + req = calloc(1, sizeof(*req)); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->fn.fs_op = cb_fn; + args->arg = cb_arg; + args->fs = fs; + + spdk_fs_free_io_channel(fs->md_io_channel); + spdk_fs_free_io_channel(fs->sync_io_channel); + spdk_bs_unload(fs->bs, unload_cb, req); +} + +static struct spdk_file * +fs_find_file(struct spdk_filesystem *fs, const char *name) +{ + struct spdk_file *file; + + TAILQ_FOREACH(file, &fs->files, tailq) { + if (!strncmp(name, file->name, SPDK_FILE_NAME_MAX)) { + return file; + } + } + + return NULL; +} + +void +spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name, + spdk_file_stat_op_complete cb_fn, void *cb_arg) +{ + struct spdk_file_stat stat; + struct spdk_file *f = NULL; + + if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) { + cb_fn(cb_arg, NULL, -ENAMETOOLONG); + return; + } + + f = fs_find_file(fs, name); + if (f != NULL) { + stat.blobid = f->blobid; + stat.size = f->length; + cb_fn(cb_arg, &stat, 0); + return; + } + + cb_fn(cb_arg, NULL, -ENOENT); +} + +static void +__copy_stat(void *arg, struct spdk_file_stat *stat, int fserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + args->rc = fserrno; + if (fserrno == 0) { + memcpy(args->arg, stat, sizeof(*stat)); + } + sem_post(args->sem); +} + +static void +__file_stat(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + spdk_fs_file_stat_async(args->fs, args->op.stat.name, + args->fn.stat_op, req); +} + +int +spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, + const char *name, struct spdk_file_stat *stat) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + int rc; + + req = alloc_fs_request(channel); + assert(req != NULL); + + req->args.fs = fs; + req->args.op.stat.name = name; + req->args.fn.stat_op = __copy_stat; + req->args.arg = stat; + req->args.sem = &channel->sem; + channel->send_request(__file_stat, req); + sem_wait(&channel->sem); + + rc = req->args.rc; + free_fs_request(req); + + return rc; +} + +static void +fs_create_blob_close_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + args->fn.file_op(args->arg, bserrno); + free_fs_request(req); +} + +static void +fs_create_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *f = args->file; + uint64_t length = 0; + + f->blob = blob; + spdk_bs_md_resize_blob(blob, 1); + spdk_blob_md_set_xattr(blob, "name", f->name, strlen(f->name) + 1); + spdk_blob_md_set_xattr(blob, "length", &length, sizeof(length)); + + spdk_bs_md_close_blob(&f->blob, fs_create_blob_close_cb, args); +} + +static void +fs_create_blob_create_cb(void *ctx, spdk_blob_id blobid, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *f = args->file; + + f->blobid = blobid; + spdk_bs_md_open_blob(f->fs->bs, blobid, fs_create_blob_open_cb, req); +} + +void +spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_file *file; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) { + cb_fn(cb_arg, -ENAMETOOLONG); + return; + } + + file = fs_find_file(fs, name); + if (file != NULL) { + cb_fn(cb_arg, -EEXIST); + return; + } + + file = file_alloc(fs); + if (file == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->file = file; + args->fn.file_op = cb_fn; + args->arg = cb_arg; + + file->name = strdup(name); + spdk_bs_md_create_blob(fs->bs, fs_create_blob_create_cb, args); +} + +static void +__fs_create_file_done(void *arg, int fserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + args->rc = fserrno; + sem_post(args->sem); + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", args->op.create.name); +} + +static void +__fs_create_file(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", args->op.create.name); + spdk_fs_create_file_async(args->fs, args->op.create.name, __fs_create_file_done, req); +} + +int +spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, const char *name) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + int rc; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", name); + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + args->fs = fs; + args->op.create.name = name; + args->sem = &channel->sem; + fs->send_request(__fs_create_file, req); + sem_wait(&channel->sem); + rc = args->rc; + free_fs_request(req); + + return rc; +} + +static void +fs_open_blob_done(void *ctx, struct spdk_blob *blob, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *f = args->file; + + f->blob = blob; + while (!TAILQ_EMPTY(&f->open_requests)) { + req = TAILQ_FIRST(&f->open_requests); + args = &req->args; + TAILQ_REMOVE(&f->open_requests, req, args.op.open.tailq); + args->fn.file_op_with_handle(args->arg, f, bserrno); + free_fs_request(req); + } +} + +static void +fs_open_blob_create_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *file = args->file; + struct spdk_filesystem *fs = args->fs; + + if (file == NULL) { + file = fs_find_file(fs, args->op.open.name); + args->file = file; + } + + file->ref_count++; + TAILQ_INSERT_TAIL(&file->open_requests, req, args.op.open.tailq); + if (file->ref_count == 1) { + assert(file->blob == NULL); + spdk_bs_md_open_blob(fs->bs, file->blobid, fs_open_blob_done, req); + } else if (file->blob != NULL) { + fs_open_blob_done(req, file->blob, 0); + } else { + /* + * The blob open for this file is in progress due to a previous + * open request. When that open completes, it will invoke the + * open callback for this request. + */ + } +} + +void +spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags, + spdk_file_op_with_handle_complete cb_fn, void *cb_arg) +{ + struct spdk_file *f = NULL; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) { + cb_fn(cb_arg, NULL, -ENAMETOOLONG); + return; + } + + f = fs_find_file(fs, name); + if (f == NULL && !(flags & SPDK_BLOBFS_OPEN_CREATE)) { + cb_fn(cb_arg, NULL, -ENOENT); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, NULL, -ENOMEM); + return; + } + + args = &req->args; + args->fn.file_op_with_handle = cb_fn; + args->arg = cb_arg; + args->file = f; + args->fs = fs; + args->op.open.name = name; + + if (f == NULL) { + spdk_fs_create_file_async(fs, name, fs_open_blob_create_cb, req); + } else { + fs_open_blob_create_cb(req, 0); + } +} + +static void +__fs_open_file_done(void *arg, struct spdk_file *file, int bserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + args->file = file; + args->rc = bserrno; + sem_post(args->sem); + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", args->op.open.name); +} + +static void +__fs_open_file(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", args->op.open.name); + spdk_fs_open_file_async(args->fs, args->op.open.name, args->op.open.flags, + __fs_open_file_done, req); +} + +int +spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, + const char *name, uint32_t flags, struct spdk_file **file) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + int rc; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", name); + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + args->fs = fs; + args->op.open.name = name; + args->op.open.flags = flags; + args->sem = &channel->sem; + fs->send_request(__fs_open_file, req); + sem_wait(&channel->sem); + rc = args->rc; + if (rc == 0) { + *file = args->file; + } else { + *file = NULL; + } + free_fs_request(req); + + return rc; +} + +static void +fs_rename_blob_close_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + args->fn.fs_op(args->arg, bserrno); + free_fs_request(req); +} + +static void +fs_rename_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *f = args->file; + const char *new_name = args->op.rename.new_name; + + f->blob = blob; + spdk_blob_md_set_xattr(blob, "name", new_name, strlen(new_name) + 1); + spdk_bs_md_close_blob(&f->blob, fs_rename_blob_close_cb, req); +} + +static void +__spdk_fs_md_rename_file(struct spdk_fs_request *req) +{ + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *f; + + f = fs_find_file(args->fs, args->op.rename.old_name); + if (f == NULL) { + args->fn.fs_op(args->arg, -ENOENT); + free_fs_request(req); + return; + } + + free(f->name); + f->name = strdup(args->op.rename.new_name); + args->file = f; + spdk_bs_md_open_blob(args->fs->bs, f->blobid, fs_rename_blob_open_cb, req); +} + +static void +fs_rename_delete_done(void *arg, int fserrno) +{ + __spdk_fs_md_rename_file(arg); +} + +void +spdk_fs_rename_file_async(struct spdk_filesystem *fs, + const char *old_name, const char *new_name, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_file *f; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "old=%s new=%s\n", old_name, new_name); + if (strnlen(new_name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) { + cb_fn(cb_arg, -ENAMETOOLONG); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->fn.fs_op = cb_fn; + args->fs = fs; + args->arg = cb_arg; + args->op.rename.old_name = old_name; + args->op.rename.new_name = new_name; + + f = fs_find_file(fs, new_name); + if (f == NULL) { + __spdk_fs_md_rename_file(req); + return; + } + + /* + * The rename overwrites an existing file. So delete the existing file, then + * do the actual rename. + */ + spdk_fs_delete_file_async(fs, new_name, fs_rename_delete_done, req); +} + +static void +__fs_rename_file_done(void *arg, int fserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + args->rc = fserrno; + sem_post(args->sem); +} + +static void +__fs_rename_file(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + spdk_fs_rename_file_async(args->fs, args->op.rename.old_name, args->op.rename.new_name, + __fs_rename_file_done, req); +} + +int +spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, + const char *old_name, const char *new_name) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + int rc; + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + + args->fs = fs; + args->op.rename.old_name = old_name; + args->op.rename.new_name = new_name; + args->sem = &channel->sem; + fs->send_request(__fs_rename_file, req); + sem_wait(&channel->sem); + rc = args->rc; + free_fs_request(req); + return rc; +} + +static void +blob_delete_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + args->fn.file_op(args->arg, bserrno); + free_fs_request(req); +} + +void +spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_file *f; + spdk_blob_id blobid; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s\n", name); + + if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) { + cb_fn(cb_arg, -ENAMETOOLONG); + return; + } + + f = fs_find_file(fs, name); + if (f == NULL) { + cb_fn(cb_arg, -ENOENT); + return; + } + + if (f->ref_count > 0) { + /* For now, do not allow deleting files with open references. */ + cb_fn(cb_arg, -EBUSY); + return; + } + + req = alloc_fs_request(fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + TAILQ_REMOVE(&fs->files, f, tailq); + + cache_free_buffers(f); + + blobid = f->blobid; + + free(f->name); + free(f->tree); + free(f); + + args = &req->args; + args->fn.file_op = cb_fn; + args->arg = cb_arg; + spdk_bs_md_delete_blob(fs->bs, blobid, blob_delete_cb, req); +} + +static void +__fs_delete_file_done(void *arg, int fserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + args->rc = fserrno; + sem_post(args->sem); +} + +static void +__fs_delete_file(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + spdk_fs_delete_file_async(args->fs, args->op.delete.name, __fs_delete_file_done, req); +} + +int +spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_io_channel *_channel, + const char *name) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + int rc; + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + args->fs = fs; + args->op.delete.name = name; + args->sem = &channel->sem; + fs->send_request(__fs_delete_file, req); + sem_wait(&channel->sem); + rc = args->rc; + free_fs_request(req); + + return rc; +} + +spdk_fs_iter +spdk_fs_iter_first(struct spdk_filesystem *fs) +{ + struct spdk_file *f; + + f = TAILQ_FIRST(&fs->files); + return f; +} + +spdk_fs_iter +spdk_fs_iter_next(spdk_fs_iter iter) +{ + struct spdk_file *f = iter; + + if (f == NULL) { + return NULL; + } + + f = TAILQ_NEXT(f, tailq); + return f; +} + +const char * +spdk_file_get_name(struct spdk_file *file) +{ + return file->name; +} + +uint64_t +spdk_file_get_length(struct spdk_file *file) +{ + assert(file != NULL); + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s length=0x%jx\n", file->name, file->length); + return file->length; +} + +static void +fs_truncate_complete_cb(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + args->fn.file_op(args->arg, bserrno); + free_fs_request(req); +} + +static uint64_t +__bytes_to_clusters(uint64_t length, uint64_t cluster_sz) +{ + return (length + cluster_sz - 1) / cluster_sz; +} + +void +spdk_file_truncate_async(struct spdk_file *file, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_filesystem *fs; + size_t num_clusters; + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s old=0x%jx new=0x%jx\n", file->name, file->length, length); + if (length == file->length) { + cb_fn(cb_arg, 0); + return; + } + + req = alloc_fs_request(file->fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->fn.file_op = cb_fn; + args->arg = cb_arg; + args->file = file; + fs = file->fs; + + num_clusters = __bytes_to_clusters(length, fs->bs_opts.cluster_sz); + + spdk_bs_md_resize_blob(file->blob, num_clusters); + spdk_blob_md_set_xattr(file->blob, "length", &length, sizeof(length)); + + file->length = length; + if (file->append_pos > file->length) { + file->append_pos = file->length; + } + + spdk_bs_md_sync_blob(file->blob, fs_truncate_complete_cb, args); +} + +static void +__truncate(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + spdk_file_truncate_async(args->file, args->op.truncate.length, + args->fn.file_op, args->arg); +} + +void +spdk_file_truncate(struct spdk_file *file, struct spdk_io_channel *_channel, + uint64_t length) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + + args->file = file; + args->op.truncate.length = length; + args->fn.file_op = __sem_post; + args->arg = &channel->sem; + + channel->send_request(__truncate, req); + sem_wait(&channel->sem); + free_fs_request(req); +} + +static void +__rw_done(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + spdk_free(args->op.rw.pin_buf); + args->fn.file_op(args->arg, bserrno); + free_fs_request(req); +} + +static void +__read_done(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + if (args->op.rw.is_read) { + memcpy(args->op.rw.user_buf, + args->op.rw.pin_buf + (args->op.rw.offset & 0xFFF), + args->op.rw.length); + __rw_done(req, 0); + } else { + memcpy(args->op.rw.pin_buf + (args->op.rw.offset & 0xFFF), + args->op.rw.user_buf, + args->op.rw.length); + spdk_bs_io_write_blob(args->file->blob, args->op.rw.channel, + args->op.rw.pin_buf, + args->op.rw.start_page, args->op.rw.num_pages, + __rw_done, req); + } +} + +static void +__do_blob_read(void *ctx, int fserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + spdk_bs_io_read_blob(args->file->blob, args->op.rw.channel, + args->op.rw.pin_buf, + args->op.rw.start_page, args->op.rw.num_pages, + __read_done, req); +} + +static void +__get_page_parameters(struct spdk_file *file, uint64_t offset, uint64_t length, + uint64_t *start_page, uint32_t *page_size, uint64_t *num_pages) +{ + uint64_t end_page; + + *page_size = spdk_bs_get_page_size(file->fs->bs); + *start_page = offset / *page_size; + end_page = (offset + length - 1) / *page_size; + *num_pages = (end_page - *start_page + 1); +} + +static void +__readwrite(struct spdk_file *file, struct spdk_io_channel *_channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg, int is_read) +{ + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + uint64_t start_page, num_pages, pin_buf_length; + uint32_t page_size; + + if (is_read && offset + length > file->length) { + cb_fn(cb_arg, -EINVAL); + return; + } + + req = alloc_fs_request(channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->fn.file_op = cb_fn; + args->arg = cb_arg; + args->file = file; + args->op.rw.channel = channel->bs_channel; + args->op.rw.user_buf = payload; + args->op.rw.is_read = is_read; + args->op.rw.offset = offset; + args->op.rw.length = length; + + __get_page_parameters(file, offset, length, &start_page, &page_size, &num_pages); + pin_buf_length = num_pages * page_size; + args->op.rw.pin_buf = spdk_malloc(pin_buf_length, 4096, NULL); + + args->op.rw.start_page = start_page; + args->op.rw.num_pages = num_pages; + + if (!is_read && file->length < offset + length) { + spdk_file_truncate_async(file, offset + length, __do_blob_read, req); + } else { + __do_blob_read(req, 0); + } +} + +void +spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + __readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 0); +} + +void +spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "file=%s offset=%jx length=%jx\n", + file->name, offset, length); + __readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 1); +} + +struct spdk_io_channel * +spdk_fs_alloc_io_channel(struct spdk_filesystem *fs, uint32_t priority) +{ + struct spdk_io_channel *io_channel; + struct spdk_fs_channel *fs_channel; + uint32_t max_ops = 512; + + io_channel = spdk_get_io_channel(fs, priority, true, (void *)&max_ops); + fs_channel = spdk_io_channel_get_ctx(io_channel); + fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs, SPDK_IO_PRIORITY_DEFAULT, 512); + fs_channel->send_request = __send_request_direct; + + return io_channel; +} + +struct spdk_io_channel * +spdk_fs_alloc_io_channel_sync(struct spdk_filesystem *fs, uint32_t priority) +{ + struct spdk_io_channel *io_channel; + struct spdk_fs_channel *fs_channel; + uint32_t max_ops = 16; + + io_channel = spdk_get_io_channel(fs, priority, true, (void *)&max_ops); + fs_channel = spdk_io_channel_get_ctx(io_channel); + fs_channel->send_request = fs->send_request; + + return io_channel; +} + +void +spdk_fs_free_io_channel(struct spdk_io_channel *channel) +{ + spdk_put_io_channel(channel); +} + +void +spdk_fs_set_cache_size(uint64_t size_in_mb) +{ + g_fs_cache_size = size_in_mb * 1024 * 1024; +} + +uint64_t +spdk_fs_get_cache_size(void) +{ + return g_fs_cache_size / (1024 * 1024); +} + +static void __file_flush(void *_args); + +static void * +alloc_cache_memory_buffer(struct spdk_file *context) +{ + struct spdk_file *file; + void *buf; + + buf = spdk_mempool_get(g_cache_pool); + if (buf != NULL) { + return buf; + } + + pthread_spin_lock(&g_caches_lock); + TAILQ_FOREACH(file, &g_caches, cache_tailq) { + if (!file->open_for_writing && + file->priority == SPDK_FILE_PRIORITY_LOW && + file != context) { + TAILQ_REMOVE(&g_caches, file, cache_tailq); + TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq); + break; + } + } + pthread_spin_unlock(&g_caches_lock); + if (file != NULL) { + cache_free_buffers(file); + buf = spdk_mempool_get(g_cache_pool); + if (buf != NULL) { + return buf; + } + } + + pthread_spin_lock(&g_caches_lock); + TAILQ_FOREACH(file, &g_caches, cache_tailq) { + if (!file->open_for_writing && file != context) { + TAILQ_REMOVE(&g_caches, file, cache_tailq); + TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq); + break; + } + } + pthread_spin_unlock(&g_caches_lock); + if (file != NULL) { + cache_free_buffers(file); + buf = spdk_mempool_get(g_cache_pool); + if (buf != NULL) { + return buf; + } + } + + pthread_spin_lock(&g_caches_lock); + TAILQ_FOREACH(file, &g_caches, cache_tailq) { + if (file != context) { + TAILQ_REMOVE(&g_caches, file, cache_tailq); + TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq); + break; + } + } + pthread_spin_unlock(&g_caches_lock); + if (file != NULL) { + cache_free_buffers(file); + buf = spdk_mempool_get(g_cache_pool); + if (buf != NULL) { + return buf; + } + } + + assert(false); + return NULL; +} + +static struct cache_buffer * +cache_insert_buffer(struct spdk_file *file, uint64_t offset) +{ + struct cache_buffer *buf; + int count = 0; + + buf = calloc(1, sizeof(*buf)); + if (buf == NULL) { + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "calloc failed\n"); + return NULL; + } + + buf->buf = alloc_cache_memory_buffer(file); + if (buf->buf == NULL) { + while (buf->buf == NULL) { + count++; + buf->buf = alloc_cache_memory_buffer(file); + /* + * TODO: __free_oldest_cache() should eventually free some buffers. + * Should have a more sophisticated check here, instead of just + * bailing if 100 tries does not result in getting a free buffer. + */ + if (count == 100) { + SPDK_ERRLOG("could not allocate cache buffer\n"); + assert(false); + return NULL; + } + } + } + + buf->buf_size = CACHE_BUFFER_SIZE; + buf->offset = offset; + + pthread_spin_lock(&g_caches_lock); + if (file->tree->present_mask == 0) { + TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq); + } + file->tree = spdk_tree_insert_buffer(file->tree, buf); + pthread_spin_unlock(&g_caches_lock); + + return buf; +} + +static struct cache_buffer * +cache_append_buffer(struct spdk_file *file) +{ + struct cache_buffer *last; + + assert(file->last == NULL || file->last->bytes_filled == file->last->buf_size); + assert((file->append_pos % CACHE_BUFFER_SIZE) == 0); + + last = cache_insert_buffer(file, file->append_pos); + if (last == NULL) { + SPDK_TRACELOG(SPDK_TRACE_BLOBFS, "cache_insert_buffer failed\n"); + return NULL; + } + + if (file->last != NULL) { + file->last->next = last; + } + file->last = last; + + return last; +} + +static void +__wake_caller(struct spdk_fs_cb_args *args) +{ + sem_post(args->sem); +} + +static void +__file_cache_finish_sync(struct spdk_file *file) +{ + struct spdk_fs_request *sync_req; + struct spdk_fs_cb_args *sync_args; + + pthread_spin_lock(&file->lock); + while (!TAILQ_EMPTY(&file->sync_requests)) { + sync_req = TAILQ_FIRST(&file->sync_requests); + sync_args = &sync_req->args; + if (sync_args->op.sync.offset > file->length_flushed) { + break; + } + BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset); + TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq); + pthread_spin_unlock(&file->lock); + sync_args->fn.file_op(sync_args->arg, 0); + pthread_spin_lock(&file->lock); + free_fs_request(sync_req); + } + pthread_spin_unlock(&file->lock); +} + +static void +__file_cache_finish_sync_bs_cb(void *ctx, int bserrno) +{ + struct spdk_file *file = ctx; + + __file_cache_finish_sync(file); +} + +static void +__free_args(struct spdk_fs_cb_args *args) +{ + struct spdk_fs_request *req; + + if (!args->from_request) { + free(args); + } else { + /* Depends on args being at the start of the spdk_fs_request structure. */ + req = (struct spdk_fs_request *)args; + free_fs_request(req); + } +} + +static void +__file_flush_done(void *arg, int bserrno) +{ + struct spdk_fs_cb_args *args = arg; + struct spdk_fs_request *sync_req; + struct spdk_file *file = args->file; + struct cache_buffer *next = args->op.flush.cache_buffer; + + BLOBFS_TRACE(file, "length=%jx\n", args->op.flush.length); + + pthread_spin_lock(&file->lock); + next->in_progress = false; + next->bytes_flushed += args->op.flush.length; + file->length_flushed += args->op.flush.length; + if (file->length_flushed > file->length) { + file->length = file->length_flushed; + } + if (next->bytes_flushed == next->buf_size) { + BLOBFS_TRACE(file, "write buffer fully flushed 0x%jx\n", file->length_flushed); + next = spdk_tree_find_buffer(file->tree, file->length_flushed); + } + + TAILQ_FOREACH_REVERSE(sync_req, &file->sync_requests, sync_requests_head, args.op.sync.tailq) { + if (sync_req->args.op.sync.offset <= file->length_flushed) { + break; + } + } + + if (sync_req != NULL) { + BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed); + spdk_blob_md_set_xattr(file->blob, "length", &file->length_flushed, + sizeof(file->length_flushed)); + + pthread_spin_unlock(&file->lock); + spdk_bs_md_sync_blob(file->blob, __file_cache_finish_sync_bs_cb, file); + } else { + pthread_spin_unlock(&file->lock); + __file_cache_finish_sync(file); + } + + /* + * Assert that there is no cached data that extends past the end of the underlying + * blob. + */ + assert(next == NULL || next->offset < __file_get_blob_size(file) || + next->bytes_filled == 0); + + __file_flush(args); +} + +static void +__file_flush(void *_args) +{ + struct spdk_fs_cb_args *args = _args; + struct spdk_file *file = args->file; + struct cache_buffer *next; + uint64_t offset, length, start_page, num_pages; + uint32_t page_size; + + pthread_spin_lock(&file->lock); + next = spdk_tree_find_buffer(file->tree, file->length_flushed); + if (next == NULL || next->in_progress) { + /* + * There is either no data to flush, or a flush I/O is already in + * progress. So return immediately - if a flush I/O is in + * progress we will flush more data after that is completed. + */ + __free_args(args); + pthread_spin_unlock(&file->lock); + return; + } + + offset = next->offset + next->bytes_flushed; + length = next->bytes_filled - next->bytes_flushed; + if (length == 0) { + __free_args(args); + pthread_spin_unlock(&file->lock); + return; + } + args->op.flush.length = length; + args->op.flush.cache_buffer = next; + + __get_page_parameters(file, offset, length, &start_page, &page_size, &num_pages); + + next->in_progress = true; + BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n", + offset, length, start_page, num_pages); + pthread_spin_unlock(&file->lock); + spdk_bs_io_write_blob(file->blob, file->fs->sync_fs_channel->bs_channel, + next->buf + (start_page * page_size) - next->offset, + start_page, num_pages, + __file_flush_done, args); +} + +static void +__file_extend_done(void *arg, int bserrno) +{ + struct spdk_fs_cb_args *args = arg; + + __wake_caller(args); +} + +static void +__file_extend_blob(void *_args) +{ + struct spdk_fs_cb_args *args = _args; + struct spdk_file *file = args->file; + + spdk_bs_md_resize_blob(file->blob, args->op.resize.num_clusters); + + spdk_bs_md_sync_blob(file->blob, __file_extend_done, args); +} + +static void +__rw_from_file_done(void *arg, int bserrno) +{ + struct spdk_fs_cb_args *args = arg; + + __wake_caller(args); + __free_args(args); +} + +static void +__rw_from_file(void *_args) +{ + struct spdk_fs_cb_args *args = _args; + struct spdk_file *file = args->file; + + if (args->op.rw.is_read) { + spdk_file_read_async(file, file->fs->sync_io_channel, args->op.rw.user_buf, + args->op.rw.offset, args->op.rw.length, + __rw_from_file_done, args); + } else { + spdk_file_write_async(file, file->fs->sync_io_channel, args->op.rw.user_buf, + args->op.rw.offset, args->op.rw.length, + __rw_from_file_done, args); + } +} + +static int +__send_rw_from_file(struct spdk_file *file, sem_t *sem, void *payload, + uint64_t offset, uint64_t length, bool is_read) +{ + struct spdk_fs_cb_args *args; + + args = calloc(1, sizeof(*args)); + if (args == NULL) { + sem_post(sem); + return -ENOMEM; + } + + args->file = file; + args->sem = sem; + args->op.rw.user_buf = payload; + args->op.rw.offset = offset; + args->op.rw.length = length; + args->op.rw.is_read = is_read; + file->fs->send_request(__rw_from_file, args); + return 0; +} + +int +spdk_file_write(struct spdk_file *file, struct spdk_io_channel *_channel, + void *payload, uint64_t offset, uint64_t length) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_cb_args *args; + uint64_t rem_length, copy, blob_size, cluster_sz; + uint32_t cache_buffers_filled = 0; + uint8_t *cur_payload; + struct cache_buffer *last; + + BLOBFS_TRACE_RW(file, "offset=%jx length=%jx\n", offset, length); + + if (length == 0) { + return 0; + } + + if (offset != file->append_pos) { + BLOBFS_TRACE(file, " error offset=%jx append_pos=%jx\n", offset, file->append_pos); + return -EINVAL; + } + + pthread_spin_lock(&file->lock); + file->open_for_writing = true; + + if (file->last == NULL) { + if (file->append_pos % CACHE_BUFFER_SIZE == 0) { + cache_append_buffer(file); + } else { + int rc; + + file->append_pos += length; + rc = __send_rw_from_file(file, &channel->sem, payload, + offset, length, false); + pthread_spin_unlock(&file->lock); + sem_wait(&channel->sem); + return rc; + } + } + + blob_size = __file_get_blob_size(file); + + if ((offset + length) > blob_size) { + struct spdk_fs_cb_args extend_args = {}; + + cluster_sz = file->fs->bs_opts.cluster_sz; + extend_args.sem = &channel->sem; + extend_args.op.resize.num_clusters = __bytes_to_clusters((offset + length), cluster_sz); + extend_args.file = file; + BLOBFS_TRACE(file, "start resize to %u clusters\n", extend_args.op.resize.num_clusters); + pthread_spin_unlock(&file->lock); + file->fs->send_request(__file_extend_blob, &extend_args); + sem_wait(&channel->sem); + } + + last = file->last; + rem_length = length; + cur_payload = payload; + while (rem_length > 0) { + copy = last->buf_size - last->bytes_filled; + if (copy > rem_length) { + copy = rem_length; + } + BLOBFS_TRACE_RW(file, " fill offset=%jx length=%jx\n", file->append_pos, copy); + memcpy(&last->buf[last->bytes_filled], cur_payload, copy); + file->append_pos += copy; + if (file->length < file->append_pos) { + file->length = file->append_pos; + } + cur_payload += copy; + last->bytes_filled += copy; + rem_length -= copy; + if (last->bytes_filled == last->buf_size) { + cache_buffers_filled++; + last = cache_append_buffer(file); + if (last == NULL) { + BLOBFS_TRACE(file, "nomem\n"); + pthread_spin_unlock(&file->lock); + return -ENOMEM; + } + } + } + + if (cache_buffers_filled == 0) { + pthread_spin_unlock(&file->lock); + return 0; + } + + args = calloc(1, sizeof(*args)); + if (args == NULL) { + pthread_spin_unlock(&file->lock); + return -ENOMEM; + } + + args->file = file; + file->fs->send_request(__file_flush, args); + pthread_spin_unlock(&file->lock); + return 0; +} + +static void +__readahead_done(void *arg, int bserrno) +{ + struct spdk_fs_cb_args *args = arg; + struct cache_buffer *cache_buffer = args->op.readahead.cache_buffer; + struct spdk_file *file = args->file; + + BLOBFS_TRACE(file, "offset=%jx\n", cache_buffer->offset); + + pthread_spin_lock(&file->lock); + cache_buffer->bytes_filled = args->op.readahead.length; + cache_buffer->bytes_flushed = args->op.readahead.length; + cache_buffer->in_progress = false; + pthread_spin_unlock(&file->lock); + + __free_args(args); +} + +static void +__readahead(void *_args) +{ + struct spdk_fs_cb_args *args = _args; + struct spdk_file *file = args->file; + uint64_t offset, length, start_page, num_pages; + uint32_t page_size; + + offset = args->op.readahead.offset; + length = args->op.readahead.length; + assert(length > 0); + + __get_page_parameters(file, offset, length, &start_page, &page_size, &num_pages); + + BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n", + offset, length, start_page, num_pages); + spdk_bs_io_read_blob(file->blob, file->fs->sync_fs_channel->bs_channel, + args->op.readahead.cache_buffer->buf, + start_page, num_pages, + __readahead_done, args); +} + +static uint64_t +__next_cache_buffer_offset(uint64_t offset) +{ + return (offset + CACHE_BUFFER_SIZE) & ~(CACHE_TREE_LEVEL_MASK(0)); +} + +static void +check_readahead(struct spdk_file *file, uint64_t offset) +{ + struct spdk_fs_cb_args *args; + + offset = __next_cache_buffer_offset(offset); + if (spdk_tree_find_buffer(file->tree, offset) != NULL || file->length <= offset) { + return; + } + + BLOBFS_TRACE(file, "offset=%jx\n", offset); + args = calloc(1, sizeof(*args)); + args->file = file; + args->op.readahead.offset = offset; + args->op.readahead.cache_buffer = cache_insert_buffer(file, offset); + args->op.readahead.cache_buffer->in_progress = true; + if (file->length < (offset + CACHE_BUFFER_SIZE)) { + args->op.readahead.length = file->length & (CACHE_BUFFER_SIZE - 1); + } else { + args->op.readahead.length = CACHE_BUFFER_SIZE; + } + file->fs->send_request(__readahead, args); +} + +static int +__file_read(struct spdk_file *file, void *payload, uint64_t offset, uint64_t length, sem_t *sem) +{ + struct cache_buffer *buf; + + buf = spdk_tree_find_filled_buffer(file->tree, offset); + if (buf == NULL) { + return __send_rw_from_file(file, sem, payload, offset, length, true); + } + + if ((offset + length) > (buf->offset + buf->bytes_filled)) { + length = buf->offset + buf->bytes_filled - offset; + } + BLOBFS_TRACE(file, "read %p offset=%ju length=%ju\n", payload, offset, length); + memcpy(payload, &buf->buf[offset - buf->offset], length); + if ((offset + length) % CACHE_BUFFER_SIZE == 0) { + pthread_spin_lock(&g_caches_lock); + spdk_tree_remove_buffer(file->tree, buf); + if (file->tree->present_mask == 0) { + TAILQ_REMOVE(&g_caches, file, cache_tailq); + } + pthread_spin_unlock(&g_caches_lock); + } + + sem_post(sem); + return 0; +} + +int64_t +spdk_file_read(struct spdk_file *file, struct spdk_io_channel *_channel, + void *payload, uint64_t offset, uint64_t length) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + uint64_t final_offset, final_length; + uint32_t sub_reads = 0; + int rc = 0; + + pthread_spin_lock(&file->lock); + + BLOBFS_TRACE_RW(file, "offset=%ju length=%ju\n", offset, length); + + file->open_for_writing = false; + + if (length == 0 || offset >= file->length) { + pthread_spin_unlock(&file->lock); + return 0; + } + + if (offset + length > file->length) { + length = file->length - offset; + } + + if (offset != file->next_seq_offset) { + file->seq_byte_count = 0; + } + file->seq_byte_count += length; + file->next_seq_offset = offset + length; + if (file->seq_byte_count >= CACHE_READAHEAD_THRESHOLD) { + check_readahead(file, offset); + check_readahead(file, offset + CACHE_BUFFER_SIZE); + } + + final_length = 0; + final_offset = offset + length; + while (offset < final_offset) { + length = NEXT_CACHE_BUFFER_OFFSET(offset) - offset; + if (length > (final_offset - offset)) { + length = final_offset - offset; + } + rc = __file_read(file, payload, offset, length, &channel->sem); + if (rc == 0) { + final_length += length; + } else { + break; + } + payload += length; + offset += length; + sub_reads++; + } + pthread_spin_unlock(&file->lock); + while (sub_reads-- > 0) { + sem_wait(&channel->sem); + } + if (rc == 0) { + return final_length; + } else { + return rc; + } +} + +static void +_file_sync(struct spdk_file *file, struct spdk_fs_channel *channel, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_fs_request *sync_req; + struct spdk_fs_request *flush_req; + struct spdk_fs_cb_args *sync_args; + struct spdk_fs_cb_args *flush_args; + + BLOBFS_TRACE(file, "offset=%jx\n", file->append_pos); + + pthread_spin_lock(&file->lock); + if (file->append_pos <= file->length_flushed || file->last == NULL) { + BLOBFS_TRACE(file, "done - no data to flush\n"); + pthread_spin_unlock(&file->lock); + cb_fn(cb_arg, 0); + return; + } + + sync_req = alloc_fs_request(channel); + assert(sync_req != NULL); + sync_args = &sync_req->args; + + flush_req = alloc_fs_request(channel); + assert(flush_req != NULL); + flush_args = &flush_req->args; + + sync_args->file = file; + sync_args->fn.file_op = cb_fn; + sync_args->arg = cb_arg; + sync_args->op.sync.offset = file->append_pos; + TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq); + pthread_spin_unlock(&file->lock); + + flush_args->file = file; + channel->send_request(__file_flush, flush_args); +} + +int +spdk_file_sync(struct spdk_file *file, struct spdk_io_channel *_channel) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + + _file_sync(file, channel, __sem_post, &channel->sem); + sem_wait(&channel->sem); + + return 0; +} + +void +spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *_channel, + spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + + _file_sync(file, channel, cb_fn, cb_arg); +} + +void +spdk_file_set_priority(struct spdk_file *file, uint32_t priority) +{ + BLOBFS_TRACE(file, "priority=%u\n", priority); + file->priority = priority; + +} + +/* + * Close routines + */ + +static void +__file_close_async_done(void *ctx, int bserrno) +{ + struct spdk_fs_request *req = ctx; + struct spdk_fs_cb_args *args = &req->args; + + args->fn.file_op(args->arg, bserrno); + free_fs_request(req); +} + +static void +__file_close_async(struct spdk_file *file, struct spdk_fs_request *req) +{ + pthread_spin_lock(&file->lock); + if (file->ref_count == 0) { + pthread_spin_unlock(&file->lock); + __file_close_async_done(req, -EBADF); + return; + } + + file->ref_count--; + if (file->ref_count > 0) { + pthread_spin_unlock(&file->lock); + __file_close_async_done(req, 0); + return; + } + + pthread_spin_unlock(&file->lock); + + spdk_bs_md_close_blob(&file->blob, __file_close_async_done, req); +} + +static void +__file_close_async__sync_done(void *arg, int fserrno) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + + __file_close_async(args->file, req); +} + +void +spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg) +{ + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + req = alloc_fs_request(file->fs->md_fs_channel); + if (req == NULL) { + cb_fn(cb_arg, -ENOMEM); + return; + } + + args = &req->args; + args->file = file; + args->fn.file_op = cb_fn; + args->arg = cb_arg; + + spdk_file_sync_async(file, file->fs->md_io_channel, __file_close_async__sync_done, req); +} + +static void +__file_close_done(void *arg, int fserrno) +{ + struct spdk_fs_cb_args *args = arg; + + args->rc = fserrno; + sem_post(args->sem); +} + +static void +__file_close(void *arg) +{ + struct spdk_fs_request *req = arg; + struct spdk_fs_cb_args *args = &req->args; + struct spdk_file *file = args->file; + + __file_close_async(file, req); +} + +int +spdk_file_close(struct spdk_file *file, struct spdk_io_channel *_channel) +{ + struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel); + struct spdk_fs_request *req; + struct spdk_fs_cb_args *args; + + req = alloc_fs_request(channel); + assert(req != NULL); + + args = &req->args; + + spdk_file_sync(file, _channel); + BLOBFS_TRACE(file, "name=%s\n", file->name); + args->file = file; + args->sem = &channel->sem; + args->fn.file_op = __file_close_done; + args->arg = req; + channel->send_request(__file_close, req); + sem_wait(&channel->sem); + + return args->rc; +} + +static void +cache_free_buffers(struct spdk_file *file) +{ + BLOBFS_TRACE(file, "free=%s\n", file->name); + pthread_spin_lock(&file->lock); + pthread_spin_lock(&g_caches_lock); + if (file->tree->present_mask == 0) { + pthread_spin_unlock(&g_caches_lock); + pthread_spin_unlock(&file->lock); + return; + } + spdk_tree_free_buffers(file->tree); + if (file->tree->present_mask == 0) { + TAILQ_REMOVE(&g_caches, file, cache_tailq); + } + file->last = NULL; + pthread_spin_unlock(&g_caches_lock); + pthread_spin_unlock(&file->lock); +} + +SPDK_LOG_REGISTER_TRACE_FLAG("blobfs", SPDK_TRACE_BLOBFS); +SPDK_LOG_REGISTER_TRACE_FLAG("blobfs_rw", SPDK_TRACE_BLOBFS_RW); diff --git a/lib/blobfs/blobfs_internal.h b/lib/blobfs/blobfs_internal.h new file mode 100644 index 000000000..b10cce9ff --- /dev/null +++ b/lib/blobfs/blobfs_internal.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BLOBFS_INTERNAL_H +#define SPDK_BLOBFS_INTERNAL_H + +struct cache_tree; + +struct cache_buffer { + uint8_t *buf; + struct cache_buffer *next; + uint64_t offset; + uint32_t buf_size; + uint32_t bytes_filled; + uint32_t bytes_flushed; + bool in_progress; +}; + +#define CACHE_BUFFER_SIZE (256 * 1024) +#define CACHE_BUFFER_SHIFT (18) +#define NEXT_CACHE_BUFFER_OFFSET(offset) \ + (((offset + CACHE_BUFFER_SIZE) >> CACHE_BUFFER_SHIFT) << CACHE_BUFFER_SHIFT) + +#define CACHE_TREE_WIDTH 64 +#define CACHE_TREE_SHIFT 6 +#define CACHE_TREE_LEVEL_SHIFT(level) (CACHE_BUFFER_SHIFT + (level) * CACHE_TREE_SHIFT) +#define CACHE_TREE_LEVEL_SIZE(level) (1ULL << CACHE_TREE_LEVEL_SHIFT(level)) +#define CACHE_TREE_LEVEL_MASK(level) (CACHE_TREE_LEVEL_SIZE(level) - 1) +#define CACHE_TREE_INDEX(level, offset) ((offset >> CACHE_TREE_LEVEL_SHIFT(level)) & (CACHE_TREE_WIDTH - 1)) + +struct cache_tree { + uint8_t level; + uint64_t present_mask; + union { + struct cache_buffer *buffer[CACHE_TREE_WIDTH]; + struct cache_tree *tree[CACHE_TREE_WIDTH]; + } u; +}; + +void spdk_cache_buffer_free(struct cache_buffer *cache_buffer); + +struct cache_tree *spdk_tree_insert_buffer(struct cache_tree *root, struct cache_buffer *buffer); +void spdk_tree_free_buffers(struct cache_tree *tree); +struct cache_buffer *spdk_tree_find_buffer(struct cache_tree *tree, uint64_t offset); +struct cache_buffer *spdk_tree_find_filled_buffer(struct cache_tree *tree, uint64_t offset); +void spdk_tree_remove_buffer(struct cache_tree *tree, struct cache_buffer *buffer); + +void spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name, + spdk_file_stat_op_complete cb_fn, void *cb_arg); +void spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_args); +void spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags, + spdk_file_op_with_handle_complete cb_fn, void *cb_arg); +void spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg); +void spdk_fs_rename_file_async(struct spdk_filesystem *fs, const char *old_name, + const char *new_name, spdk_fs_op_complete cb_fn, + void *cb_arg); +void spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name, + spdk_file_op_complete cb_fn, void *cb_arg); +void spdk_file_truncate_async(struct spdk_file *file, uint64_t length, + spdk_file_op_complete cb_fn, void *arg); +void spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); +void spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel, + void *payload, uint64_t offset, uint64_t length, + spdk_file_op_complete cb_fn, void *cb_arg); + +/* Sync all dirty cache buffers to the backing block device. For async + * usage models, completion of the sync indicates only that data written + * when the sync command was issued have been flushed to disk - it does + * not guarantee any writes submitted after the sync have been flushed, + * even if those writes are completed before the sync. + */ +void spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *channel, + spdk_file_op_complete cb_fn, void *cb_arg); + +#endif /* SPDK_BLOBFS_INTERNAL_H_ */ diff --git a/lib/blobfs/tree.c b/lib/blobfs/tree.c new file mode 100644 index 000000000..45c6aa467 --- /dev/null +++ b/lib/blobfs/tree.c @@ -0,0 +1,182 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "spdk/blobfs.h" +#include "blobfs_internal.h" + +#include "spdk/queue.h" +#include "spdk/assert.h" +#include "spdk/env.h" +#include "spdk_internal/log.h" + +struct cache_buffer * +spdk_tree_find_buffer(struct cache_tree *tree, uint64_t offset) +{ + uint64_t index; + + while (tree != NULL) { + index = offset / CACHE_TREE_LEVEL_SIZE(tree->level); + if (index >= CACHE_TREE_WIDTH) { + return NULL; + } + if (tree->level == 0) { + return tree->u.buffer[index]; + } else { + offset &= CACHE_TREE_LEVEL_MASK(tree->level); + tree = tree->u.tree[index]; + } + } + + return NULL; +} + +struct cache_buffer * +spdk_tree_find_filled_buffer(struct cache_tree *tree, uint64_t offset) +{ + struct cache_buffer *buf; + + buf = spdk_tree_find_buffer(tree, offset); + if (buf != NULL && buf->bytes_filled > 0) { + return buf; + } else { + return NULL; + } +} + +struct cache_tree * +spdk_tree_insert_buffer(struct cache_tree *root, struct cache_buffer *buffer) +{ + struct cache_tree *tree; + uint64_t index, offset; + + offset = buffer->offset; + while (offset >= CACHE_TREE_LEVEL_SIZE(root->level)) { + if (root->present_mask != 0) { + tree = calloc(1, sizeof(*tree)); + tree->level = root->level + 1; + tree->u.tree[0] = root; + root = tree; + root->present_mask = 0x1ULL; + } else { + root->level++; + } + } + + tree = root; + while (tree->level > 0) { + index = offset / CACHE_TREE_LEVEL_SIZE(tree->level); + offset &= CACHE_TREE_LEVEL_MASK(tree->level); + if (tree->u.tree[index] == NULL) { + tree->u.tree[index] = calloc(1, sizeof(*tree)); + tree->u.tree[index]->level = tree->level - 1; + tree->present_mask |= (1ULL << index); + } + tree = tree->u.tree[index]; + } + + index = offset / CACHE_BUFFER_SIZE; + assert(tree->u.buffer[index] == NULL); + tree->u.buffer[index] = buffer; + tree->present_mask |= (1ULL << index); + return root; +} + +void +spdk_tree_remove_buffer(struct cache_tree *tree, struct cache_buffer *buffer) +{ + struct cache_tree *child; + uint64_t index; + + index = CACHE_TREE_INDEX(tree->level, buffer->offset); + + if (tree->level == 0) { + assert(tree->u.buffer[index] != NULL); + assert(buffer == tree->u.buffer[index]); + tree->present_mask &= ~(1ULL << index); + tree->u.buffer[index] = NULL; + spdk_cache_buffer_free(buffer); + return; + } + + child = tree->u.tree[index]; + assert(child != NULL); + spdk_tree_remove_buffer(child, buffer); + if (child->present_mask == 0) { + tree->present_mask &= ~(1ULL << index); + tree->u.tree[index] = NULL; + free(child); + } +} + +void +spdk_tree_free_buffers(struct cache_tree *tree) +{ + struct cache_buffer *buffer; + struct cache_tree *child; + uint32_t i; + + if (tree->present_mask == 0) { + return; + } + + if (tree->level == 0) { + for (i = 0; i < CACHE_TREE_WIDTH; i++) { + buffer = tree->u.buffer[i]; + if (buffer != NULL && buffer->in_progress == false && + buffer->bytes_filled == buffer->bytes_flushed) { + spdk_cache_buffer_free(buffer); + tree->u.buffer[i] = NULL; + tree->present_mask &= ~(1ULL << i); + } + } + } else { + for (i = 0; i < CACHE_TREE_WIDTH; i++) { + child = tree->u.tree[i]; + if (child != NULL) { + spdk_tree_free_buffers(child); + if (child->present_mask == 0) { + free(child); + tree->u.tree[i] = NULL; + tree->present_mask &= ~(1ULL << i); + } + } + } + } +} diff --git a/test/blobfs/rocksdb/.gitignore b/test/blobfs/rocksdb/.gitignore new file mode 100644 index 000000000..1a06816d8 --- /dev/null +++ b/test/blobfs/rocksdb/.gitignore @@ -0,0 +1 @@ +results diff --git a/test/blobfs/rocksdb/common_flags.txt b/test/blobfs/rocksdb/common_flags.txt new file mode 100644 index 000000000..6390c7a40 --- /dev/null +++ b/test/blobfs/rocksdb/common_flags.txt @@ -0,0 +1,27 @@ +--disable_seek_compaction=1 +--mmap_read=0 +--statistics=1 +--histogram=1 +--key_size=16 +--value_size=1000 +--block_size=4096 +--cache_size=0 +--bloom_bits=10 +--cache_numshardbits=4 +--open_files=500000 +--verify_checksum=1 +--db=/mnt/rocksdb +--sync=0 +--compression_type=none +--stats_interval=1000000 +--compression_ratio=1 +--disable_data_sync=0 +--target_file_size_base=67108864 +--max_write_buffer_number=3 +--max_bytes_for_level_multiplier=10 +--max_background_compactions=10 +--num_levels=10 +--delete_obsolete_files_period_micros=3000000 +--max_grandparent_overlap_factor=10 +--stats_per_interval=1 +--max_bytes_for_level_base=10485760 diff --git a/test/blobfs/rocksdb/postprocess.py b/test/blobfs/rocksdb/postprocess.py new file mode 100755 index 000000000..ec18fc56f --- /dev/null +++ b/test/blobfs/rocksdb/postprocess.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +from collections import namedtuple +from itertools import islice +import operator +import sys + +total_samples = 0 +thread_module_samples = {} +function_module_samples = {} +module_samples = {} +threads = set() + +ThreadModule = namedtuple('ThreadModule', ['thread', 'module']) +FunctionModule = namedtuple('FunctionModule', ['function', 'module']) + +with open(sys.argv[1] + "/" + sys.argv[2] + ".perf.txt") as f: + for line in f: + fields = line.split() + total_samples += int(fields[1]) + key = ThreadModule(fields[2], fields[3]) + thread_module_samples.setdefault(key, 0) + thread_module_samples[key] += int(fields[1]) + key = FunctionModule(fields[5], fields[3]) + function_module_samples.setdefault(key, 0) + function_module_samples[key] += int(fields[1]) + threads.add(fields[2]) + + key = fields[3] + module_samples.setdefault(key, 0) + module_samples[key] += int(fields[1]) + +for thread in sorted(threads): + thread_pct = 0 + print + print("Thread: {:s}".format(thread)) + print(" Percent Module") + print("============================") + for key, value in sorted(thread_module_samples.items(), key=operator.itemgetter(1), reverse=True): + if key.thread == thread: + print("{:8.4f} {:20s}".format(float(value) * 100 / total_samples, key.module)) + thread_pct += float(value) * 100 / total_samples + print("============================") + print("{:8.4f} Total".format(thread_pct)) + +print +print(" Percent Module Function") +print("=================================================================") +for key, value in islice(sorted(function_module_samples.items(), key=operator.itemgetter(1), reverse=True), 100): + print("{:8.4f} {:20s} {:s}".format(float(value) * 100 / total_samples, key.module, key.function)) + +print +print +print(" Percent Module") +print("=================================") +for key, value in sorted(module_samples.items(), key=operator.itemgetter(1), reverse=True): + print("{:8.4f} {:s}".format(float(value) * 100 / total_samples, key)) + +print +with open(sys.argv[1] + "/" + sys.argv[2] + "_db_bench.txt") as f: + for line in f: + if "maxresident" in line: + fields = line.split() + print("Wall time elapsed: {:s}".format(fields[2].split("e")[0])) + print("CPU utilization: {:s}".format(fields[3].split('C')[0])) + user = float(fields[0].split('u')[0]) + system = float(fields[1].split('s')[0]) + print("User: {:8.2f} ({:5.2f}%)".format(user, user * 100 / (user + system))) + print("System: {:8.2f} ({:5.2f}%)".format(system, system * 100 / (user + system))) + +print diff --git a/test/blobfs/rocksdb/rocksdb.sh b/test/blobfs/rocksdb/rocksdb.sh new file mode 100755 index 000000000..2dfd74a81 --- /dev/null +++ b/test/blobfs/rocksdb/rocksdb.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +testdir=$(readlink -f $(dirname $0)) +rootdir=$testdir/../../.. +source $rootdir/scripts/autotest_common.sh + +DB_BENCH_DIR=/usr/src/rocksdb +DB_BENCH=$DB_BENCH_DIR/db_bench +ROCKSDB_CONF=$testdir/rocksdb.conf + +if [ ! -e $DB_BENCH_DIR ]; then + echo $DB_BENCH_DIR does not exist, skipping rocksdb tests + exit 0 +fi + +timing_enter rocksdb + +timing_enter db_bench_build + +pushd $DB_BENCH_DIR +$MAKE db_bench $MAKEFLAGS $MAKECONFIG DEBUG_LEVEL=0 SPDK_DIR=$rootdir DPDK_DIR=$DPDK_DIR +popd + +timing_exit db_bench_build + +cp $rootdir/etc/spdk/rocksdb.conf.in $ROCKSDB_CONF +$rootdir/scripts/gen_nvme.sh >> $ROCKSDB_CONF + +$rootdir/test/lib/blobfs/mkfs/mkfs $ROCKSDB_CONF Nvme0n1 +DURATION=30 NUM_KEYS=50000000 ROCKSDB_CONF=$ROCKSDB_CONF CACHE_SIZE=1024 $testdir/run_tests.sh $DB_BENCH +mkdir $output_dir/rocksdb +cp $testdir/results/last/* $output_dir/rocksdb + +trap 'rm -f $ROCKSDB_CONF; exit 1' SIGINT SIGTERM EXIT + +trap - SIGINT SIGTERM EXIT + +rm -f $ROCKSDB_CONF + +timing_exit rocksdb diff --git a/test/blobfs/rocksdb/run_tests.sh b/test/blobfs/rocksdb/run_tests.sh new file mode 100755 index 000000000..23e73abc2 --- /dev/null +++ b/test/blobfs/rocksdb/run_tests.sh @@ -0,0 +1,185 @@ +#!/bin/bash +set -e + +if [ $# -eq 0 ] +then + echo "usage: $0 " + exit 1 +fi + +DB_BENCH=$(readlink -f $1) +[ -e $DB_BENCH ] || (echo "$DB_BENCH does not exist - needs to be built" && exit 1) + +hash mkfs.xfs +if ! hash perf; then + NO_PERF=1 +fi +hash python +[ -e /usr/include/gflags/gflags.h ] || (echo "gflags not installed." && exit 1) + +# Increase max number of file descriptors. This will be inherited +# by processes spawned from this script. +ulimit -n 16384 + +TESTDIR=$(readlink -f $(dirname $0)) +mkdir -p $TESTDIR/results/old +# if there are any existing test results, move them into the "old" directory +ls $TESTDIR/results/testrun_* &> /dev/null && mv $TESTDIR/results/testrun_* $TESTDIR/results/old + +RESULTS_DIR=$TESTDIR/results/testrun_`date +%Y%m%d_%H%M%S` +mkdir $RESULTS_DIR +rm -f $TESTDIR/results/last +ln -s $RESULTS_DIR $TESTDIR/results/last + +: ${CACHE_SIZE:=4096} +: ${DURATION:=120} +: ${NUM_KEYS:=500000000} +: ${ROCKSDB_CONF:=/usr/local/etc/spdk/rocksdb.conf} + +if [ "$NO_SPDK" = "1" ] +then + [ -e /dev/nvme0n1 ] || (echo "No /dev/nvme0n1 device node found." && exit 1) +else + [ -e /dev/nvme0n1 ] && (echo "/dev/nvme0n1 device found - need to run SPDK setup.sh script to bind to UIO." && exit 1) +fi + +cd $RESULTS_DIR + +SYSINFO_FILE=$RESULTS_DIR/sysinfo.txt +COMMAND="hostname" +echo ">> $COMMAND : " >> $SYSINFO_FILE +$COMMAND >> $SYSINFO_FILE +echo >> $SYSINFO_FILE + +COMMAND="cat /proc/cpuinfo" +echo ">> $COMMAND : " >> $SYSINFO_FILE +$COMMAND >> $SYSINFO_FILE +echo >> $SYSINFO_FILE + +COMMAND="cat /proc/meminfo" +echo ">> $COMMAND : " >> $SYSINFO_FILE +$COMMAND >> $SYSINFO_FILE +echo >> $SYSINFO_FILE + +if [ "$NO_SPDK" = "1" ] +then + echo -n Creating and mounting XFS filesystem... + sudo mkdir -p /mnt/rocksdb + sudo umount /mnt/rocksdb || true &> /dev/null + sudo mkfs.xfs -d agcount=32 -l su=4096 -f /dev/nvme0n1 &> mkfs_xfs.txt + sudo mount -o discard /dev/nvme0n1 /mnt/rocksdb + sudo chown $USER /mnt/rocksdb + echo done. +fi + +cp $TESTDIR/common_flags.txt insert_flags.txt +echo "--benchmarks=fillseq" >> insert_flags.txt +echo "--threads=1" >> insert_flags.txt +echo "--disable_wal=1" >> insert_flags.txt +echo "--use_existing_db=0" >> insert_flags.txt +echo "--num=$NUM_KEYS" >> insert_flags.txt + +cp $TESTDIR/common_flags.txt randread_flags.txt +echo "--benchmarks=readrandom" >> randread_flags.txt +echo "--threads=16" >> randread_flags.txt +echo "--duration=$DURATION" >> randread_flags.txt +echo "--disable_wal=1" >> randread_flags.txt +echo "--use_existing_db=1" >> randread_flags.txt +echo "--num=$NUM_KEYS" >> randread_flags.txt + +cp $TESTDIR/common_flags.txt overwrite_flags.txt +echo "--benchmarks=overwrite" >> overwrite_flags.txt +echo "--threads=1" >> overwrite_flags.txt +echo "--duration=$DURATION" >> overwrite_flags.txt +echo "--disable_wal=1" >> overwrite_flags.txt +echo "--use_existing_db=1" >> overwrite_flags.txt +echo "--num=$NUM_KEYS" >> overwrite_flags.txt + +cp $TESTDIR/common_flags.txt readwrite_flags.txt +echo "--benchmarks=readwhilewriting" >> readwrite_flags.txt +echo "--threads=4" >> readwrite_flags.txt +echo "--duration=$DURATION" >> readwrite_flags.txt +echo "--disable_wal=1" >> readwrite_flags.txt +echo "--use_existing_db=1" >> readwrite_flags.txt +echo "--num=$NUM_KEYS" >> readwrite_flags.txt + +cp $TESTDIR/common_flags.txt writesync_flags.txt +echo "--benchmarks=overwrite" >> writesync_flags.txt +echo "--threads=1" >> writesync_flags.txt +echo "--duration=$DURATION" >> writesync_flags.txt +echo "--disable_wal=0" >> writesync_flags.txt +echo "--use_existing_db=1" >> writesync_flags.txt +echo "--sync=1" >> writesync_flags.txt +echo "--num=$NUM_KEYS" >> writesync_flags.txt + +run_step() { + if [ -z "$1" ] + then + echo run_step called with no parameter + exit 1 + fi + + if [ -z "$NO_SPDK" ] + then + echo "--spdk=$ROCKSDB_CONF" >> "$1"_flags.txt + echo "--spdk_bdev=Nvme0n1" >> "$1"_flags.txt + echo "--spdk_cache_size=$CACHE_SIZE" >> "$1"_flags.txt + fi + + if [ "$NO_SPDK" = "1" ] + then + echo "--bytes_per_sync=262144" >> "$1"_flags.txt + cat /sys/block/nvme0n1/stat > "$1"_blockdev_stats.txt + fi + + echo -n Start $1 test phase... + if [ "$NO_PERF" = "1" ] + then + sudo /usr/bin/time taskset 0xFFF perf record $DB_BENCH --flagfile="$1"_flags.txt &> "$1"_db_bench.txt + else + sudo /usr/bin/time taskset 0xFFF $DB_BENCH --flagfile="$1"_flags.txt &> "$1"_db_bench.txt + fi + echo done. + + if [ "$NO_SPDK" = "1" ] + then + cat /sys/block/nvme0n1/stat >> "$1"_blockdev_stats.txt + fi + + if [ "$NO_PERF" = "1" ] + then + echo -n Generating perf report for $1 test phase... + sudo perf report -f -n | sed '/#/d' | sed '/%/!d' | sort -r > $1.perf.txt + sudo rm perf.data + $TESTDIR/postprocess.py `pwd` $1 > $1_summary.txt + echo done. + fi +} + +if [ -z "$SKIP_INSERT" ] +then + run_step insert +fi +if [ -z "$SKIP_OVERWRITE" ] +then + run_step overwrite +fi +if [ -z "$SKIP_READWRITE" ] +then + run_step readwrite +fi +if [ -z "$SKIP_WRITESYNC" ] +then + run_step writesync +fi +if [ -z "$SKIP_RANDREAD" ] +then + run_step randread +fi + +if [ "$NO_SPDK" = "1" ] +then + echo -n Unmounting XFS filesystem... + sudo umount /mnt/rocksdb || true &> /dev/null + echo done. +fi diff --git a/test/lib/Makefile b/test/lib/Makefile index 43ddd25d9..77ef47f3a 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y = bdev blob env event log json jsonrpc nvme nvmf scsi ioat util +DIRS-y = bdev blob blobfs env event log json jsonrpc nvme nvmf scsi ioat util ifeq ($(OS),Linux) DIRS-y += iscsi endif diff --git a/test/lib/blobfs/Makefile b/test/lib/blobfs/Makefile new file mode 100644 index 000000000..bd801aa2b --- /dev/null +++ b/test/lib/blobfs/Makefile @@ -0,0 +1,49 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +DIRS-y = blobfs_ut cache_ut mkfs + +# TODO: do not check a hardcoded path here +ifneq (,$(wildcard /usr/local/include/fuse3)) +DIRS-y += fuse +endif + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +include $(SPDK_ROOT_DIR)/mk/spdk.subdirs.mk diff --git a/test/lib/blobfs/blobfs_ut/.gitignore b/test/lib/blobfs/blobfs_ut/.gitignore new file mode 100644 index 000000000..4a7e1261e --- /dev/null +++ b/test/lib/blobfs/blobfs_ut/.gitignore @@ -0,0 +1 @@ +blobfs_ut diff --git a/test/lib/blobfs/blobfs_ut/Makefile b/test/lib/blobfs/blobfs_ut/Makefile new file mode 100644 index 000000000..8b265d553 --- /dev/null +++ b/test/lib/blobfs/blobfs_ut/Makefile @@ -0,0 +1,56 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +APP = blobfs_ut + +C_SRCS := blobfs_ut.c +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/blobfs -I$(SPDK_ROOT_DIR)/lib/blob +CFLAGS += -I$(SPDK_ROOT_DIR)/test + +SPDK_LIB_LIST = blob log util + +LIBS += $(SPDK_LIB_LINKER_ARGS) -lcunit + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/blobfs/blobfs_ut/blobfs_ut.c b/test/lib/blobfs/blobfs_ut/blobfs_ut.c new file mode 100644 index 000000000..07dc07d6b --- /dev/null +++ b/test/lib/blobfs/blobfs_ut/blobfs_ut.c @@ -0,0 +1,398 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "CUnit/Basic.h" + +#include "lib/test_env.c" + +#include "spdk_cunit.h" +#include "blobfs.c" +#include "tree.c" + +#include "lib/blob/bs_dev_common.c" + +struct spdk_filesystem *g_fs; +struct spdk_file *g_file; +int g_fserrno; + +static void +fs_op_complete(void *ctx, int fserrno) +{ + g_fserrno = fserrno; +} + +static void +fs_op_with_handle_complete(void *ctx, struct spdk_filesystem *fs, int fserrno) +{ + g_fs = fs; + g_fserrno = fserrno; +} + +static void +fs_init(void) +{ + struct spdk_filesystem *fs; + struct spdk_bs_dev dev; + + init_dev(&dev); + spdk_allocate_thread(); + + spdk_fs_init(&dev, NULL, fs_op_with_handle_complete, NULL); + CU_ASSERT(g_fs != NULL); + CU_ASSERT(g_fserrno == 0); + fs = g_fs; + + spdk_fs_unload(fs, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + + spdk_free_thread(); +} + +static void +create_cb(void *ctx, int fserrno) +{ + g_fserrno = fserrno; +} + +static void +open_cb(void *ctx, struct spdk_file *f, int fserrno) +{ + g_fserrno = fserrno; + g_file = f; +} + +static void +delete_cb(void *ctx, int fserrno) +{ + g_fserrno = fserrno; +} + +static void +fs_open(void) +{ + struct spdk_filesystem *fs; + spdk_fs_iter iter; + struct spdk_bs_dev dev; + struct spdk_file *file; + + init_dev(&dev); + spdk_allocate_thread(); + + spdk_fs_init(&dev, NULL, fs_op_with_handle_complete, NULL); + CU_ASSERT(g_fs != NULL); + CU_ASSERT(g_fserrno == 0); + fs = g_fs; + + g_fserrno = 0; + spdk_fs_open_file_async(fs, "file1", 0, open_cb, NULL); + CU_ASSERT(g_fserrno == -ENOENT); + + g_file = NULL; + g_fserrno = 1; + spdk_fs_open_file_async(fs, "file1", SPDK_BLOBFS_OPEN_CREATE, open_cb, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + CU_ASSERT(!strcmp("file1", g_file->name)); + CU_ASSERT(g_file->ref_count == 1); + + iter = spdk_fs_iter_first(fs); + CU_ASSERT(iter != NULL); + file = spdk_fs_iter_get_file(iter); + SPDK_CU_ASSERT_FATAL(file != NULL); + CU_ASSERT(!strcmp("file1", file->name)); + iter = spdk_fs_iter_next(iter); + CU_ASSERT(iter == NULL); + + g_fserrno = 0; + /* Delete should fail, since we have an open reference. */ + spdk_fs_delete_file_async(fs, "file1", delete_cb, NULL); + CU_ASSERT(g_fserrno == -EBUSY); + CU_ASSERT(!TAILQ_EMPTY(&fs->files)); + + g_fserrno = 1; + spdk_file_close_async(g_file, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(g_file->ref_count == 0); + + g_fserrno = 0; + spdk_file_close_async(g_file, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == -EBADF); + CU_ASSERT(g_file->ref_count == 0); + + g_fserrno = 1; + spdk_fs_delete_file_async(fs, "file1", delete_cb, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(TAILQ_EMPTY(&fs->files)); + + spdk_fs_unload(fs, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + + spdk_free_thread(); +} + +static void +fs_truncate(void) +{ + struct spdk_filesystem *fs; + struct spdk_bs_dev dev; + + init_dev(&dev); + spdk_allocate_thread(); + + spdk_fs_init(&dev, NULL, fs_op_with_handle_complete, NULL); + SPDK_CU_ASSERT_FATAL(g_fs != NULL); + CU_ASSERT(g_fserrno == 0); + fs = g_fs; + + g_file = NULL; + g_fserrno = 1; + spdk_fs_open_file_async(fs, "file1", SPDK_BLOBFS_OPEN_CREATE, open_cb, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + + g_fserrno = 1; + spdk_file_truncate_async(g_file, 18 * 1024 * 1024 + 1, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(g_file->length == 18 * 1024 * 1024 + 1); + + g_fserrno = 1; + spdk_file_truncate_async(g_file, 1, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(g_file->length == 1); + + g_fserrno = 1; + spdk_file_truncate_async(g_file, 18 * 1024 * 1024 + 1, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(g_file->length == 18 * 1024 * 1024 + 1); + + g_fserrno = 1; + spdk_file_close_async(g_file, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(g_file->ref_count == 0); + + g_fserrno = 1; + spdk_fs_delete_file_async(fs, "file1", delete_cb, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(TAILQ_EMPTY(&fs->files)); + + spdk_fs_unload(fs, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + + spdk_free_thread(); +} + +static void +fs_rename(void) +{ + struct spdk_filesystem *fs; + struct spdk_file *file, *file2; + struct spdk_bs_dev dev; + + init_dev(&dev); + spdk_allocate_thread(); + + spdk_fs_init(&dev, NULL, fs_op_with_handle_complete, NULL); + SPDK_CU_ASSERT_FATAL(g_fs != NULL); + CU_ASSERT(g_fserrno == 0); + fs = g_fs; + + g_fserrno = 1; + spdk_fs_create_file_async(fs, "file1", create_cb, NULL); + CU_ASSERT(g_fserrno == 0); + + g_file = NULL; + g_fserrno = 1; + spdk_fs_open_file_async(fs, "file1", 0, open_cb, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + CU_ASSERT(g_file->ref_count == 1); + + file = g_file; + g_file = NULL; + g_fserrno = 1; + spdk_file_close_async(file, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(file->ref_count == 0); + + g_file = NULL; + g_fserrno = 1; + spdk_fs_open_file_async(fs, "file2", SPDK_BLOBFS_OPEN_CREATE, open_cb, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + CU_ASSERT(g_file->ref_count == 1); + + file2 = g_file; + g_file = NULL; + g_fserrno = 1; + spdk_file_close_async(file2, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + SPDK_CU_ASSERT_FATAL(file2->ref_count == 0); + + /* + * Do a 3-way rename. This should delete the old "file2", then rename + * "file1" to "file2". + */ + g_fserrno = 1; + spdk_fs_rename_file_async(fs, "file1", "file2", fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(file->ref_count == 0); + CU_ASSERT(!strcmp(file->name, "file2")); + CU_ASSERT(TAILQ_FIRST(&fs->files) == file); + CU_ASSERT(TAILQ_NEXT(file, tailq) == NULL); + + g_fserrno = 0; + spdk_fs_delete_file_async(fs, "file1", delete_cb, NULL); + CU_ASSERT(g_fserrno == -ENOENT); + CU_ASSERT(!TAILQ_EMPTY(&fs->files)); + + g_fserrno = 1; + spdk_fs_delete_file_async(fs, "file2", delete_cb, NULL); + CU_ASSERT(g_fserrno == 0); + CU_ASSERT(TAILQ_EMPTY(&fs->files)); + + spdk_fs_unload(fs, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + + spdk_free_thread(); +} + +static void +tree_find_buffer_ut(void) +{ + struct cache_tree *root; + struct cache_tree *level1_0; + struct cache_tree *level0_0_0; + struct cache_tree *level0_0_12; + struct cache_buffer *leaf_0_0_4; + struct cache_buffer *leaf_0_12_8; + struct cache_buffer *leaf_9_23_15; + struct cache_buffer *buffer; + + level1_0 = calloc(1, sizeof(struct cache_tree)); + level0_0_0 = calloc(1, sizeof(struct cache_tree)); + level0_0_12 = calloc(1, sizeof(struct cache_tree)); + leaf_0_0_4 = calloc(1, sizeof(struct cache_buffer)); + leaf_0_12_8 = calloc(1, sizeof(struct cache_buffer)); + leaf_9_23_15 = calloc(1, sizeof(struct cache_buffer)); + + level1_0->level = 1; + level0_0_0->level = 0; + level0_0_12->level = 0; + + leaf_0_0_4->offset = CACHE_BUFFER_SIZE * 4; + level0_0_0->u.buffer[4] = leaf_0_0_4; + level0_0_0->present_mask |= (1ULL << 4); + + leaf_0_12_8->offset = CACHE_TREE_LEVEL_SIZE(1) * 12 + CACHE_BUFFER_SIZE * 8; + level0_0_12->u.buffer[8] = leaf_0_12_8; + level0_0_12->present_mask |= (1ULL << 8); + + level1_0->u.tree[0] = level0_0_0; + level1_0->present_mask |= (1ULL << 0); + level1_0->u.tree[12] = level0_0_12; + level1_0->present_mask |= (1ULL << 12); + + buffer = spdk_tree_find_buffer(NULL, 0); + CU_ASSERT(buffer == NULL); + + buffer = spdk_tree_find_buffer(level0_0_0, 0); + CU_ASSERT(buffer == NULL); + + buffer = spdk_tree_find_buffer(level0_0_0, CACHE_TREE_LEVEL_SIZE(0) + 1); + CU_ASSERT(buffer == NULL); + + buffer = spdk_tree_find_buffer(level0_0_0, leaf_0_0_4->offset); + CU_ASSERT(buffer == leaf_0_0_4); + + buffer = spdk_tree_find_buffer(level1_0, leaf_0_0_4->offset); + CU_ASSERT(buffer == leaf_0_0_4); + + buffer = spdk_tree_find_buffer(level1_0, leaf_0_12_8->offset); + CU_ASSERT(buffer == leaf_0_12_8); + + buffer = spdk_tree_find_buffer(level1_0, leaf_0_12_8->offset + CACHE_BUFFER_SIZE - 1); + CU_ASSERT(buffer == leaf_0_12_8); + + buffer = spdk_tree_find_buffer(level1_0, leaf_0_12_8->offset - 1); + CU_ASSERT(buffer == NULL); + + leaf_9_23_15->offset = CACHE_TREE_LEVEL_SIZE(2) * 9 + + CACHE_TREE_LEVEL_SIZE(1) * 23 + + CACHE_BUFFER_SIZE * 15; + root = spdk_tree_insert_buffer(level1_0, leaf_9_23_15); + CU_ASSERT(root != level1_0); + buffer = spdk_tree_find_buffer(root, leaf_9_23_15->offset); + CU_ASSERT(buffer == leaf_9_23_15); + spdk_tree_free_buffers(root); + free(root); +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("blobfs", NULL, NULL); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if ( + CU_add_test(suite, "fs_init", fs_init) == NULL || + CU_add_test(suite, "fs_open", fs_open) == NULL || + CU_add_test(suite, "fs_truncate", fs_truncate) == NULL || + CU_add_test(suite, "fs_rename", fs_rename) == NULL || + CU_add_test(suite, "tree_find_buffer", tree_find_buffer_ut) == NULL + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + g_dev_buffer = calloc(1, DEV_BUFFER_SIZE); + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + free(g_dev_buffer); + return num_failures; +} diff --git a/test/lib/blobfs/cache_ut/.gitignore b/test/lib/blobfs/cache_ut/.gitignore new file mode 100644 index 000000000..32d6e308d --- /dev/null +++ b/test/lib/blobfs/cache_ut/.gitignore @@ -0,0 +1 @@ +cache_ut diff --git a/test/lib/blobfs/cache_ut/Makefile b/test/lib/blobfs/cache_ut/Makefile new file mode 100644 index 000000000..7dab97add --- /dev/null +++ b/test/lib/blobfs/cache_ut/Makefile @@ -0,0 +1,56 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +APP = cache_ut + +C_SRCS := cache_ut.c +CFLAGS += -I$(SPDK_ROOT_DIR)/lib/blobfs -I$(SPDK_ROOT_DIR)/lib/blob +CFLAGS += -I$(SPDK_ROOT_DIR)/test + +SPDK_LIB_LIST = util log blob util + +LIBS += $(SPDK_LIB_LINKER_ARGS) -lcunit + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/blobfs/cache_ut/cache_ut.c b/test/lib/blobfs/cache_ut/cache_ut.c new file mode 100644 index 000000000..d7fa667c5 --- /dev/null +++ b/test/lib/blobfs/cache_ut/cache_ut.c @@ -0,0 +1,282 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/blobfs.h" +#include "spdk/env.h" +#include "spdk/log.h" +#include "spdk/io_channel.h" +#include "spdk/barrier.h" + +#include "spdk_cunit.h" +#include "lib/blob/bs_dev_common.c" +#include "lib/test_env.c" +#include "blobfs.c" +#include "tree.c" + +struct spdk_filesystem *g_fs; +struct spdk_file *g_file; +int g_fserrno; + +struct spdk_bs_dev g_dev; + +sem_t g_sem; + +struct ut_request { + fs_request_fn fn; + void *arg; + volatile int done; + int from_ut; +}; + +volatile struct ut_request *g_req = NULL; +volatile int g_phase = 0; + +static void +send_request(fs_request_fn fn, void *arg) +{ + struct ut_request *req; + + req = calloc(1, sizeof(*req)); + req->fn = fn; + req->arg = arg; + req->done = 0; + req->from_ut = 0; + g_req = req; + spdk_mb(); + g_phase = !g_phase; + spdk_mb(); +} + +static void +ut_send_request(fs_request_fn fn, void *arg) +{ + struct ut_request req; + + req.fn = fn; + req.arg = arg; + req.done = 0; + req.from_ut = 1; + g_req = &req; + spdk_mb(); + g_phase = !g_phase; + spdk_mb(); + while (req.done == 0) + ; +} + +static void +fs_op_complete(void *ctx, int fserrno) +{ + g_fserrno = fserrno; +} + +static void +fs_op_with_handle_complete(void *ctx, struct spdk_filesystem *fs, int fserrno) +{ + g_fs = fs; + g_fserrno = fserrno; +} + +static void +_fs_init(void *arg) +{ + g_fs = NULL; + g_fserrno = -1; + spdk_fs_init(&g_dev, send_request, fs_op_with_handle_complete, NULL); + SPDK_CU_ASSERT_FATAL(g_fs != NULL); + CU_ASSERT(g_fserrno == 0); +} + +static void +_fs_unload(void *arg) +{ + g_fserrno = -1; + spdk_fs_unload(g_fs, fs_op_complete, NULL); + CU_ASSERT(g_fserrno == 0); + g_fs = NULL; +} + +static void +cache_write(void) +{ + uint64_t length; + int rc; + char buf[100]; + struct spdk_io_channel *channel; + + ut_send_request(_fs_init, NULL); + + spdk_allocate_thread(); + channel = spdk_fs_alloc_io_channel_sync(g_fs, SPDK_IO_PRIORITY_DEFAULT); + + rc = spdk_fs_open_file(g_fs, channel, "testfile", SPDK_BLOBFS_OPEN_CREATE, &g_file); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + + length = (4 * 1024 * 1024); + spdk_file_truncate(g_file, channel, length); + + spdk_file_write(g_file, channel, buf, 0, sizeof(buf)); + + CU_ASSERT(spdk_file_get_length(g_file) == length); + + spdk_file_truncate(g_file, channel, sizeof(buf)); + + spdk_file_close(g_file, channel); + rc = spdk_fs_delete_file(g_fs, channel, "testfile"); + CU_ASSERT(rc == 0); + + rc = spdk_fs_delete_file(g_fs, channel, "testfile"); + CU_ASSERT(rc == -ENOENT); + + spdk_fs_free_io_channel(channel); + spdk_free_thread(); + + ut_send_request(_fs_unload, NULL); +} + +static void +cache_append_no_cache(void) +{ + int rc; + char buf[100]; + struct spdk_io_channel *channel; + + ut_send_request(_fs_init, NULL); + + spdk_allocate_thread(); + channel = spdk_fs_alloc_io_channel_sync(g_fs, SPDK_IO_PRIORITY_DEFAULT); + + rc = spdk_fs_open_file(g_fs, channel, "testfile", SPDK_BLOBFS_OPEN_CREATE, &g_file); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(g_file != NULL); + + spdk_file_write(g_file, channel, buf, 0 * sizeof(buf), sizeof(buf)); + CU_ASSERT(spdk_file_get_length(g_file) == 1 * sizeof(buf)); + spdk_file_write(g_file, channel, buf, 1 * sizeof(buf), sizeof(buf)); + CU_ASSERT(spdk_file_get_length(g_file) == 2 * sizeof(buf)); + cache_free_buffers(g_file); + spdk_file_write(g_file, channel, buf, 2 * sizeof(buf), sizeof(buf)); + CU_ASSERT(spdk_file_get_length(g_file) == 3 * sizeof(buf)); + spdk_file_write(g_file, channel, buf, 3 * sizeof(buf), sizeof(buf)); + CU_ASSERT(spdk_file_get_length(g_file) == 4 * sizeof(buf)); + spdk_file_write(g_file, channel, buf, 4 * sizeof(buf), sizeof(buf)); + CU_ASSERT(spdk_file_get_length(g_file) == 5 * sizeof(buf)); + + spdk_file_close(g_file, channel); + rc = spdk_fs_delete_file(g_fs, channel, "testfile"); + CU_ASSERT(rc == 0); + + spdk_fs_free_io_channel(channel); + spdk_free_thread(); + + ut_send_request(_fs_unload, NULL); +} + +static void +terminate_spdk_thread(void *arg) +{ + spdk_free_thread(); + pthread_exit(NULL); +} + +static void * +spdk_thread(void *arg) +{ + struct ut_request *req; + int phase = 0; + spdk_allocate_thread(); + + while (1) { + spdk_mb(); + if (phase != g_phase) { + req = (void *)g_req; + req->fn(req->arg); + req->done = 1; + spdk_mb(); + if (!req->from_ut) { + free(req); + } + phase = !phase; + } + } + + return NULL; +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + pthread_t spdk_tid; + unsigned int num_failures; + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("cache_ut", NULL, NULL); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + if ( + CU_add_test(suite, "write", cache_write) == NULL || + CU_add_test(suite, "append_no_cache", cache_append_no_cache) == NULL + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + init_dev(&g_dev); + sem_init(&g_sem, 0, 0); + pthread_create(&spdk_tid, NULL, spdk_thread, NULL); + g_dev_buffer = calloc(1, DEV_BUFFER_SIZE); + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + free(g_dev_buffer); + send_request(terminate_spdk_thread, NULL); + pthread_join(spdk_tid, NULL); + return num_failures; +} diff --git a/test/lib/blobfs/fuse/.gitignore b/test/lib/blobfs/fuse/.gitignore new file mode 100644 index 000000000..a517c488f --- /dev/null +++ b/test/lib/blobfs/fuse/.gitignore @@ -0,0 +1 @@ +fuse diff --git a/test/lib/blobfs/fuse/Makefile b/test/lib/blobfs/fuse/Makefile new file mode 100644 index 000000000..60d593303 --- /dev/null +++ b/test/lib/blobfs/fuse/Makefile @@ -0,0 +1,60 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = fuse + +C_SRCS := fuse.c +CFLAGS += $(DPDK_INC) + +SPDK_LIB_LIST = blobfs blob bdev blob_bdev copy event util conf trace \ + log jsonrpc json rpc + +LIBS += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) +LIBS+= -L/usr/local/lib -lfuse3 +LIBS += -ldl + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/blobfs/fuse/fuse.c b/test/lib/blobfs/fuse/fuse.c new file mode 100644 index 000000000..a7d16ca70 --- /dev/null +++ b/test/lib/blobfs/fuse/fuse.c @@ -0,0 +1,356 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#define FUSE_USE_VERSION 30 + +#include "fuse3/fuse.h" +#include "fuse3/fuse_lowlevel.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/blobfs.h" +#include "spdk/bdev.h" +#include "spdk/event.h" +#include "spdk/io_channel.h" +#include "spdk/blob_bdev.h" +#include "spdk/log.h" + +struct fuse *g_fuse; +char *g_bdev_name; +char *g_mountpoint; +pthread_t g_fuse_thread; + +struct spdk_bs_dev *g_bs_dev; +struct spdk_filesystem *g_fs; +struct spdk_io_channel *g_channel; +struct spdk_file *g_file; +int g_fserrno; +int g_fuse_argc = 0; +char **g_fuse_argv = NULL; + +static void +__call_fn(void *arg1, void *arg2) +{ + fs_request_fn fn; + + fn = (fs_request_fn)arg1; + fn(arg2); +} + +static void +__send_request(fs_request_fn fn, void *arg) +{ + struct spdk_event *event; + + event = spdk_event_allocate(0, __call_fn, (void *)fn, arg); + spdk_event_call(event); +} + +static int +spdk_fuse_getattr(const char *path, struct stat *stbuf, struct fuse_file_info *fi) +{ + struct spdk_file_stat stat; + int rc; + + if (!strcmp(path, "/")) { + stbuf->st_mode = S_IFDIR | 0755; + stbuf->st_nlink = 2; + return 0; + } + + rc = spdk_fs_file_stat(g_fs, g_channel, &path[1], &stat); + if (rc == 0) { + stbuf->st_mode = S_IFREG | 0644; + stbuf->st_nlink = 1; + stbuf->st_size = stat.size; + } + + return rc; +} + +static int +spdk_fuse_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi, + enum fuse_readdir_flags flags) +{ + struct spdk_file *file; + const char *filename; + spdk_fs_iter iter; + + filler(buf, ".", NULL, 0, 0); + filler(buf, "..", NULL, 0, 0); + + iter = spdk_fs_iter_first(g_fs); + while (iter != NULL) { + file = spdk_fs_iter_get_file(iter); + iter = spdk_fs_iter_next(iter); + filename = spdk_file_get_name(file); + filler(buf, filename, NULL, 0, 0); + } + + return 0; +} + +static int +spdk_fuse_mknod(const char *path, mode_t mode, dev_t rdev) +{ + return spdk_fs_create_file(g_fs, g_channel, &path[1]); +} + +static int +spdk_fuse_unlink(const char *path) +{ + return spdk_fs_delete_file(g_fs, g_channel, &path[1]); +} + +static int +spdk_fuse_truncate(const char *path, off_t size, struct fuse_file_info *fi) +{ + struct spdk_file *file; + int rc; + + rc = spdk_fs_open_file(g_fs, g_channel, &path[1], 0, &file); + if (rc != 0) { + return -rc; + } + + spdk_file_truncate(file, g_channel, size); + spdk_file_close(file, g_channel); + + return 0; +} + +static int +spdk_fuse_utimens(const char *path, const struct timespec tv[2], struct fuse_file_info *fi) +{ + return 0; +} + +static int +spdk_fuse_open(const char *path, struct fuse_file_info *info) +{ + struct spdk_file *file; + int rc; + + rc = spdk_fs_open_file(g_fs, g_channel, &path[1], 0, &file); + if (rc != 0) { + return -rc; + } + + info->fh = (uintptr_t)file; + return 0; +} + +static int +spdk_fuse_release(const char *path, struct fuse_file_info *info) +{ + struct spdk_file *file = (struct spdk_file *)info->fh; + + return spdk_file_close(file, g_channel); +} + +static int +spdk_fuse_read(const char *path, char *buf, size_t len, off_t offset, struct fuse_file_info *info) +{ + struct spdk_file *file = (struct spdk_file *)info->fh; + + return spdk_file_read(file, g_channel, buf, offset, len); +} + +static int +spdk_fuse_write(const char *path, const char *buf, size_t len, off_t offset, + struct fuse_file_info *info) +{ + struct spdk_file *file = (struct spdk_file *)info->fh; + int rc; + + rc = spdk_file_write(file, g_channel, (void *)buf, offset, len); + if (rc == 0) { + return len; + } else { + return rc; + } +} + +static int +spdk_fuse_flush(const char *path, struct fuse_file_info *info) +{ + return 0; +} + +static int +spdk_fuse_fsync(const char *path, int datasync, struct fuse_file_info *info) +{ + return 0; +} + +static int +spdk_fuse_rename(const char *old_path, const char *new_path, unsigned int flags) +{ + return spdk_fs_rename_file(g_fs, g_channel, &old_path[1], &new_path[1]); +} + +static struct fuse_operations spdk_fuse_oper = { + .getattr = spdk_fuse_getattr, + .readdir = spdk_fuse_readdir, + .mknod = spdk_fuse_mknod, + .unlink = spdk_fuse_unlink, + .truncate = spdk_fuse_truncate, + .utimens = spdk_fuse_utimens, + .open = spdk_fuse_open, + .release = spdk_fuse_release, + .read = spdk_fuse_read, + .write = spdk_fuse_write, + .flush = spdk_fuse_flush, + .fsync = spdk_fuse_fsync, + .rename = spdk_fuse_rename, +}; + +static void +construct_targets(void) +{ + struct spdk_bdev *bdev; + + bdev = spdk_bdev_get_by_name(g_bdev_name); + if (bdev == NULL) { + SPDK_ERRLOG("bdev %s not found\n", g_bdev_name); + exit(1); + } + + if (!spdk_bdev_claim(bdev, NULL, NULL)) { + SPDK_ERRLOG("could not claim bdev %s\n", bdev->name); + exit(1); + } + + g_bs_dev = spdk_bdev_create_bs_dev(bdev); + + printf("Mounting BlobFS on bdev %s\n", bdev->name); +} + +static void +start_fuse_fn(void *arg1, void *arg2) +{ + struct fuse_args args = FUSE_ARGS_INIT(g_fuse_argc, g_fuse_argv); + int rc; + struct fuse_cmdline_opts opts = {}; + + g_fuse_thread = pthread_self(); + rc = fuse_parse_cmdline(&args, &opts); + if (rc != 0) { + spdk_app_stop(-1); + fuse_opt_free_args(&args); + return; + } + g_fuse = fuse_new(&args, &spdk_fuse_oper, sizeof(spdk_fuse_oper), NULL); + fuse_opt_free_args(&args); + + rc = fuse_mount(g_fuse, g_mountpoint); + if (rc != 0) { + spdk_app_stop(-1); + return; + } + + fuse_daemonize(true /* true = run in foreground */); + + fuse_loop(g_fuse); + + fuse_unmount(g_fuse); + fuse_destroy(g_fuse); +} + +static void +init_cb(void *ctx, struct spdk_filesystem *fs, int fserrno) +{ + struct spdk_event *event; + + g_fs = fs; + g_channel = spdk_fs_alloc_io_channel_sync(g_fs, SPDK_IO_PRIORITY_DEFAULT); + event = spdk_event_allocate(1, start_fuse_fn, NULL, NULL); + spdk_event_call(event); +} + +static void +spdk_fuse_run(void *arg1, void *arg2) +{ + construct_targets(); + spdk_fs_load(g_bs_dev, __send_request, init_cb, NULL); +} + +static void +shutdown_cb(void *ctx, int fserrno) +{ + fuse_session_exit(fuse_get_session(g_fuse)); + pthread_kill(g_fuse_thread, SIGINT); + spdk_fs_free_io_channel(g_channel); + spdk_app_stop(0); +} + +static void +spdk_fuse_shutdown(void) +{ + spdk_fs_unload(g_fs, shutdown_cb, NULL); +} + +int main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + + if (argc < 4) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + spdk_app_opts_init(&opts); + opts.name = "spdk_fuse"; + opts.config_file = argv[1]; + opts.reactor_mask = "0x3"; + opts.dpdk_mem_size = 6144; + opts.shutdown_cb = spdk_fuse_shutdown; + spdk_app_init(&opts); + + g_bdev_name = argv[2]; + g_mountpoint = argv[3]; + g_fuse_argc = argc - 2; + g_fuse_argv = &argv[2]; + + spdk_app_start(spdk_fuse_run, NULL, NULL); + spdk_app_fini(); + + return 0; +} diff --git a/test/lib/blobfs/mkfs/.gitignore b/test/lib/blobfs/mkfs/.gitignore new file mode 100644 index 000000000..54e292c61 --- /dev/null +++ b/test/lib/blobfs/mkfs/.gitignore @@ -0,0 +1 @@ +mkfs diff --git a/test/lib/blobfs/mkfs/Makefile b/test/lib/blobfs/mkfs/Makefile new file mode 100644 index 000000000..0769b59fc --- /dev/null +++ b/test/lib/blobfs/mkfs/Makefile @@ -0,0 +1,58 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk +include $(SPDK_ROOT_DIR)/mk/spdk.app.mk +include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk + +APP = mkfs + +C_SRCS := mkfs.c +CFLAGS += $(DPDK_INC) + +SPDK_LIB_LIST = blobfs blob bdev blob_bdev copy event util conf trace \ + log jsonrpc json rpc + +LIBS += $(COPY_MODULES_LINKER_ARGS) $(BLOCKDEV_MODULES_LINKER_ARGS) +LIBS += $(SPDK_LIB_LINKER_ARGS) $(ENV_LINKER_ARGS) + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIB_FILES) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/test/lib/blobfs/mkfs/mkfs.c b/test/lib/blobfs/mkfs/mkfs.c new file mode 100644 index 000000000..13e15dd74 --- /dev/null +++ b/test/lib/blobfs/mkfs/mkfs.c @@ -0,0 +1,124 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spdk/blobfs.h" +#include "spdk/bdev.h" +#include "spdk/event.h" +#include "spdk/blob_bdev.h" +#include "spdk/log.h" + +struct spdk_bs_dev *g_bs_dev; +const char *g_bdev_name; + +static void +stop_cb(void *ctx, int fserrno) +{ + spdk_app_stop(0); +} + +static void +shutdown_cb(void *arg1, void *arg2) +{ + struct spdk_filesystem *fs = arg1; + + printf("done.\n"); + spdk_fs_unload(fs, stop_cb, NULL); +} + +static void +init_cb(void *ctx, struct spdk_filesystem *fs, int fserrno) +{ + struct spdk_event *event; + + event = spdk_event_allocate(0, shutdown_cb, fs, NULL); + spdk_event_call(event); +} + +static void +spdk_mkfs_run(void *arg1, void *arg2) +{ + struct spdk_bdev *bdev; + + bdev = spdk_bdev_get_by_name(g_bdev_name); + + if (bdev == NULL) { + SPDK_ERRLOG("bdev %s not found\n", g_bdev_name); + spdk_app_stop(-1); + return; + } + + if (!spdk_bdev_claim(bdev, NULL, NULL)) { + SPDK_ERRLOG("could not claim bdev %s\n", g_bdev_name); + spdk_app_stop(-1); + return; + } + + printf("Initializing filesystem on bdev %s...", g_bdev_name); + fflush(stdout); + g_bs_dev = spdk_bdev_create_bs_dev(bdev); + spdk_fs_init(g_bs_dev, NULL, init_cb, NULL); +} + +int main(int argc, char **argv) +{ + struct spdk_app_opts opts = {}; + + if (argc < 3) { + SPDK_ERRLOG("usage: %s \n", argv[0]); + exit(1); + } + + spdk_app_opts_init(&opts); + opts.name = "spdk_mkfs"; + opts.config_file = argv[1]; + opts.reactor_mask = "0x3"; + opts.dpdk_mem_size = 1024; + opts.shutdown_cb = NULL; + spdk_app_init(&opts); + + spdk_fs_set_cache_size(512); + + g_bdev_name = argv[2]; + spdk_app_start(spdk_mkfs_run, NULL, NULL); + spdk_app_fini(); + + return 0; +} diff --git a/unittest.sh b/unittest.sh index 6c01d08ed..c6d13876f 100755 --- a/unittest.sh +++ b/unittest.sh @@ -7,6 +7,9 @@ set -xe $valgrind test/lib/blob/blob_ut/blob_ut +$valgrind test/lib/blobfs/blobfs_ut/blobfs_ut +$valgrind test/lib/blobfs/cache_ut/cache_ut + $valgrind test/lib/nvme/unit/nvme_c/nvme_ut $valgrind test/lib/nvme/unit/nvme_ctrlr_c/nvme_ctrlr_ut $valgrind test/lib/nvme/unit/nvme_ctrlr_cmd_c/nvme_ctrlr_cmd_ut