From a6014eb2adf0c95816b23ef94a911005fa047511 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 12 Jul 2017 15:49:24 -0700 Subject: [PATCH] blobfs: process one set_xattr at a time During RocksDB testing with MySQL, we found cases where blobfs would try to update and sync the length xattr on the underlying blob while an existing update was already in progress. This was primarily driven by RocksDB performing appends and syncs on the log file from multiple writer threads. The simplest way to fix this is to just process one sync_request at a time. There could be a tiny bit of inefficiency here if multiple threads are appending and syncing a file in parallel - we can look at some additional optimizations if we find a case where that is noticeable. Signed-off-by: Jim Harris Reported-by: Changpeng Liu Tested-by: Changpeng Liu Change-Id: I7ab7814494d365bae8716efd0b828337286cc7b7 Reviewed-on: https://review.gerrithub.io/369490 Tested-by: SPDK Automated Test System Reviewed-by: Ben Walker Reviewed-by: Changpeng Liu --- lib/blobfs/blobfs.c | 74 +++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/lib/blobfs/blobfs.c b/lib/blobfs/blobfs.c index 36538ef72..381fa5d54 100644 --- a/lib/blobfs/blobfs.c +++ b/lib/blobfs/blobfs.c @@ -166,6 +166,7 @@ struct spdk_fs_cb_args { struct { uint64_t offset; TAILQ_ENTRY(spdk_fs_request) tailq; + bool xattr_in_progress; } sync; struct { uint32_t num_clusters; @@ -1647,6 +1648,8 @@ __wake_caller(struct spdk_fs_cb_args *args) sem_post(args->sem); } +static void __check_sync_reqs(struct spdk_file *file); + static void __file_cache_finish_sync(struct spdk_file *file) { @@ -1654,19 +1657,18 @@ __file_cache_finish_sync(struct spdk_file *file) struct spdk_fs_cb_args *sync_args; pthread_spin_lock(&file->lock); - while (!TAILQ_EMPTY(&file->sync_requests)) { - sync_req = TAILQ_FIRST(&file->sync_requests); - sync_args = &sync_req->args; - if (sync_args->op.sync.offset > file->length_flushed) { - break; - } - BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset); - TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq); - pthread_spin_unlock(&file->lock); - sync_args->fn.file_op(sync_args->arg, 0); - pthread_spin_lock(&file->lock); - free_fs_request(sync_req); - } + sync_req = TAILQ_FIRST(&file->sync_requests); + sync_args = &sync_req->args; + assert(sync_args->op.sync.offset <= file->length_flushed); + BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset); + TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq); + pthread_spin_unlock(&file->lock); + + sync_args->fn.file_op(sync_args->arg, 0); + __check_sync_reqs(file); + + pthread_spin_lock(&file->lock); + free_fs_request(sync_req); pthread_spin_unlock(&file->lock); } @@ -1692,11 +1694,36 @@ __free_args(struct spdk_fs_cb_args *args) } } +static void +__check_sync_reqs(struct spdk_file *file) +{ + struct spdk_fs_request *sync_req; + + pthread_spin_lock(&file->lock); + + TAILQ_FOREACH(sync_req, &file->sync_requests, args.op.sync.tailq) { + if (sync_req->args.op.sync.offset <= file->length_flushed) { + break; + } + } + + if (sync_req != NULL && !sync_req->args.op.sync.xattr_in_progress) { + BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed); + sync_req->args.op.sync.xattr_in_progress = true; + spdk_blob_md_set_xattr(file->blob, "length", &file->length_flushed, + sizeof(file->length_flushed)); + + pthread_spin_unlock(&file->lock); + spdk_bs_md_sync_blob(file->blob, __file_cache_finish_sync_bs_cb, file); + } else { + pthread_spin_unlock(&file->lock); + } +} + static void __file_flush_done(void *arg, int bserrno) { struct spdk_fs_cb_args *args = arg; - struct spdk_fs_request *sync_req; struct spdk_file *file = args->file; struct cache_buffer *next = args->op.flush.cache_buffer; @@ -1714,12 +1741,6 @@ __file_flush_done(void *arg, int bserrno) next = spdk_tree_find_buffer(file->tree, file->length_flushed); } - TAILQ_FOREACH_REVERSE(sync_req, &file->sync_requests, sync_requests_head, args.op.sync.tailq) { - if (sync_req->args.op.sync.offset <= file->length_flushed) { - break; - } - } - /* * Assert that there is no cached data that extends past the end of the underlying * blob. @@ -1727,17 +1748,9 @@ __file_flush_done(void *arg, int bserrno) assert(next == NULL || next->offset < __file_get_blob_size(file) || next->bytes_filled == 0); - if (sync_req != NULL) { - BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed); - spdk_blob_md_set_xattr(file->blob, "length", &file->length_flushed, - sizeof(file->length_flushed)); + pthread_spin_unlock(&file->lock); - pthread_spin_unlock(&file->lock); - spdk_bs_md_sync_blob(file->blob, __file_cache_finish_sync_bs_cb, file); - } else { - pthread_spin_unlock(&file->lock); - __file_cache_finish_sync(file); - } + __check_sync_reqs(file); __file_flush(args); } @@ -2153,6 +2166,7 @@ _file_sync(struct spdk_file *file, struct spdk_fs_channel *channel, sync_args->fn.file_op = cb_fn; sync_args->arg = cb_arg; sync_args->op.sync.offset = file->append_pos; + sync_args->op.sync.xattr_in_progress = false; TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq); pthread_spin_unlock(&file->lock);