Spdk/module/bdev/ocf/vbdev_ocf.c

1780 lines
42 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2018 Intel Corporation.
* All rights reserved.
*/
#include <ocf/ocf.h>
#include <ocf/ocf_types.h>
#include <ocf/ocf_mngt.h>
#include "ctx.h"
#include "data.h"
#include "volume.h"
#include "utils.h"
#include "vbdev_ocf.h"
#include "spdk/bdev_module.h"
#include "spdk/thread.h"
#include "spdk/string.h"
#include "spdk/log.h"
#include "spdk/cpuset.h"
static struct spdk_bdev_module ocf_if;
static TAILQ_HEAD(, vbdev_ocf) g_ocf_vbdev_head
= TAILQ_HEAD_INITIALIZER(g_ocf_vbdev_head);
static TAILQ_HEAD(, examining_bdev) g_ocf_examining_bdevs_head
= TAILQ_HEAD_INITIALIZER(g_ocf_examining_bdevs_head);
bool g_fini_started = false;
/* Structure for keeping list of bdevs that are claimed but not used yet */
struct examining_bdev {
struct spdk_bdev *bdev;
TAILQ_ENTRY(examining_bdev) tailq;
};
/* Add bdev to list of claimed */
static void
examine_start(struct spdk_bdev *bdev)
{
struct examining_bdev *entry = malloc(sizeof(*entry));
assert(entry);
entry->bdev = bdev;
TAILQ_INSERT_TAIL(&g_ocf_examining_bdevs_head, entry, tailq);
}
/* Find bdev on list of claimed bdevs, then remove it,
* if it was the last one on list then report examine done */
static void
examine_done(int status, struct vbdev_ocf *vbdev, void *cb_arg)
{
struct spdk_bdev *bdev = cb_arg;
struct examining_bdev *entry, *safe, *found = NULL;
TAILQ_FOREACH_SAFE(entry, &g_ocf_examining_bdevs_head, tailq, safe) {
if (entry->bdev == bdev) {
if (found) {
goto remove;
} else {
found = entry;
}
}
}
assert(found);
spdk_bdev_module_examine_done(&ocf_if);
remove:
TAILQ_REMOVE(&g_ocf_examining_bdevs_head, found, tailq);
free(found);
}
/* Free allocated strings and structure itself
* Used at shutdown only */
static void
free_vbdev(struct vbdev_ocf *vbdev)
{
if (!vbdev) {
return;
}
free(vbdev->name);
free(vbdev->cache.name);
free(vbdev->core.name);
free(vbdev);
}
/* Get existing cache base
* that is attached to other vbdev */
static struct vbdev_ocf_base *
get_other_cache_base(struct vbdev_ocf_base *base)
{
struct vbdev_ocf *vbdev;
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (&vbdev->cache == base || !vbdev->cache.attached) {
continue;
}
if (!strcmp(vbdev->cache.name, base->name)) {
return &vbdev->cache;
}
}
return NULL;
}
static bool
is_ocf_cache_running(struct vbdev_ocf *vbdev)
{
if (vbdev->cache.attached && vbdev->ocf_cache) {
return ocf_cache_is_running(vbdev->ocf_cache);
}
return false;
}
static bool
is_ocf_cache_initializing(struct vbdev_ocf *vbdev)
{
if (vbdev->cache.attached && vbdev->ocf_cache) {
return ocf_cache_is_initializing(vbdev->ocf_cache);
}
return false;
}
/* Get existing OCF cache instance
* that is started by other vbdev */
static ocf_cache_t
get_other_cache_instance(struct vbdev_ocf *vbdev)
{
struct vbdev_ocf *cmp;
TAILQ_FOREACH(cmp, &g_ocf_vbdev_head, tailq) {
if (cmp->state.doing_finish || cmp == vbdev) {
continue;
}
if (strcmp(cmp->cache.name, vbdev->cache.name)) {
continue;
}
if (is_ocf_cache_running(cmp) || is_ocf_cache_initializing(cmp)) {
return cmp->ocf_cache;
}
}
return NULL;
}
static void
_remove_base_bdev(void *ctx)
{
struct spdk_bdev_desc *desc = ctx;
spdk_bdev_close(desc);
}
/* Close and unclaim base bdev */
static void
remove_base_bdev(struct vbdev_ocf_base *base)
{
if (base->attached) {
if (base->management_channel) {
spdk_put_io_channel(base->management_channel);
}
spdk_bdev_module_release_bdev(base->bdev);
/* Close the underlying bdev on its same opened thread. */
if (base->thread && base->thread != spdk_get_thread()) {
spdk_thread_send_msg(base->thread, _remove_base_bdev, base->desc);
} else {
spdk_bdev_close(base->desc);
}
base->attached = false;
}
}
/* Finish unregister operation */
static void
unregister_finish(struct vbdev_ocf *vbdev)
{
spdk_bdev_destruct_done(&vbdev->exp_bdev, vbdev->state.stop_status);
if (vbdev->ocf_cache) {
ocf_mngt_cache_put(vbdev->ocf_cache);
}
if (vbdev->cache_ctx) {
vbdev_ocf_cache_ctx_put(vbdev->cache_ctx);
}
vbdev_ocf_mngt_continue(vbdev, 0);
}
static void
close_core_bdev(struct vbdev_ocf *vbdev)
{
remove_base_bdev(&vbdev->core);
vbdev_ocf_mngt_continue(vbdev, 0);
}
static void
remove_core_cmpl(void *priv, int error)
{
struct vbdev_ocf *vbdev = priv;
ocf_mngt_cache_unlock(vbdev->ocf_cache);
vbdev_ocf_mngt_continue(vbdev, error);
}
/* Try to lock cache, then remove core */
static void
remove_core_cache_lock_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = (struct vbdev_ocf *)priv;
if (error) {
SPDK_ERRLOG("Error %d, can not lock cache instance %s\n",
error, vbdev->name);
vbdev_ocf_mngt_continue(vbdev, error);
return;
}
ocf_mngt_cache_remove_core(vbdev->ocf_core, remove_core_cmpl, vbdev);
}
/* Detach core base */
static void
detach_core(struct vbdev_ocf *vbdev)
{
if (is_ocf_cache_running(vbdev)) {
ocf_mngt_cache_lock(vbdev->ocf_cache, remove_core_cache_lock_cmpl, vbdev);
} else {
vbdev_ocf_mngt_continue(vbdev, 0);
}
}
static void
close_cache_bdev(struct vbdev_ocf *vbdev)
{
remove_base_bdev(&vbdev->cache);
vbdev_ocf_mngt_continue(vbdev, 0);
}
/* Detach cache base */
static void
detach_cache(struct vbdev_ocf *vbdev)
{
vbdev->state.stop_status = vbdev->mngt_ctx.status;
/* If some other vbdev references this cache bdev,
* we detach this only by changing the flag, without actual close */
if (get_other_cache_base(&vbdev->cache)) {
vbdev->cache.attached = false;
}
vbdev_ocf_mngt_continue(vbdev, 0);
}
static void
stop_vbdev_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = priv;
vbdev_ocf_queue_put(vbdev->cache_ctx->mngt_queue);
ocf_mngt_cache_unlock(cache);
vbdev_ocf_mngt_continue(vbdev, error);
}
/* Try to lock cache, then stop it */
static void
stop_vbdev_cache_lock_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = (struct vbdev_ocf *)priv;
if (error) {
SPDK_ERRLOG("Error %d, can not lock cache instance %s\n",
error, vbdev->name);
vbdev_ocf_mngt_continue(vbdev, error);
return;
}
ocf_mngt_cache_stop(vbdev->ocf_cache, stop_vbdev_cmpl, vbdev);
}
/* Stop OCF cache object
* vbdev_ocf is not operational after this */
static void
stop_vbdev(struct vbdev_ocf *vbdev)
{
if (!is_ocf_cache_running(vbdev)) {
vbdev_ocf_mngt_continue(vbdev, 0);
return;
}
if (!g_fini_started && get_other_cache_instance(vbdev)) {
SPDK_NOTICELOG("Not stopping cache instance '%s'"
" because it is referenced by other OCF bdev\n",
vbdev->cache.name);
vbdev_ocf_mngt_continue(vbdev, 0);
return;
}
ocf_mngt_cache_lock(vbdev->ocf_cache, stop_vbdev_cache_lock_cmpl, vbdev);
}
static void
flush_vbdev_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = priv;
ocf_mngt_cache_unlock(cache);
vbdev_ocf_mngt_continue(vbdev, error);
}
static void
flush_vbdev_cache_lock_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = (struct vbdev_ocf *)priv;
if (error) {
SPDK_ERRLOG("Error %d, can not lock cache instance %s\n",
error, vbdev->name);
vbdev_ocf_mngt_continue(vbdev, error);
return;
}
ocf_mngt_cache_flush(vbdev->ocf_cache, flush_vbdev_cmpl, vbdev);
}
static void
flush_vbdev(struct vbdev_ocf *vbdev)
{
if (!is_ocf_cache_running(vbdev)) {
vbdev_ocf_mngt_continue(vbdev, -EINVAL);
return;
}
ocf_mngt_cache_lock(vbdev->ocf_cache, flush_vbdev_cache_lock_cmpl, vbdev);
}
/* Procedures called during dirty unregister */
vbdev_ocf_mngt_fn unregister_path_dirty[] = {
flush_vbdev,
stop_vbdev,
detach_cache,
close_cache_bdev,
detach_core,
close_core_bdev,
unregister_finish,
NULL
};
/* Procedures called during clean unregister */
vbdev_ocf_mngt_fn unregister_path_clean[] = {
flush_vbdev,
detach_core,
close_core_bdev,
stop_vbdev,
detach_cache,
close_cache_bdev,
unregister_finish,
NULL
};
/* Start asynchronous management operation using unregister_path */
static void
unregister_cb(void *opaque)
{
struct vbdev_ocf *vbdev = opaque;
vbdev_ocf_mngt_fn *unregister_path;
int rc;
unregister_path = vbdev->state.doing_clean_delete ?
unregister_path_clean : unregister_path_dirty;
rc = vbdev_ocf_mngt_start(vbdev, unregister_path, NULL, NULL);
if (rc) {
SPDK_ERRLOG("Unable to unregister OCF bdev: %d\n", rc);
spdk_bdev_destruct_done(&vbdev->exp_bdev, rc);
}
}
/* Clean remove case - remove core and then cache, this order
* will remove instance permanently */
static void
_vbdev_ocf_destruct_clean(struct vbdev_ocf *vbdev)
{
if (vbdev->core.attached) {
detach_core(vbdev);
close_core_bdev(vbdev);
}
if (vbdev->cache.attached) {
detach_cache(vbdev);
close_cache_bdev(vbdev);
}
}
/* Dirty shutdown/hot remove case - remove cache and then core, this order
* will allow us to recover this instance in the future */
static void
_vbdev_ocf_destruct_dirty(struct vbdev_ocf *vbdev)
{
if (vbdev->cache.attached) {
detach_cache(vbdev);
close_cache_bdev(vbdev);
}
if (vbdev->core.attached) {
detach_core(vbdev);
close_core_bdev(vbdev);
}
}
/* Unregister io device with callback to unregister_cb
* This function is called during spdk_bdev_unregister */
static int
vbdev_ocf_destruct(void *opaque)
{
struct vbdev_ocf *vbdev = opaque;
if (vbdev->state.doing_finish) {
return -EALREADY;
}
if (vbdev->state.starting && !vbdev->state.started) {
/* Prevent before detach cache/core during register path of
this bdev */
return -EBUSY;
}
vbdev->state.doing_finish = true;
if (vbdev->state.started) {
spdk_io_device_unregister(vbdev, unregister_cb);
/* Return 1 because unregister is delayed */
return 1;
}
if (vbdev->state.doing_clean_delete) {
_vbdev_ocf_destruct_clean(vbdev);
} else {
_vbdev_ocf_destruct_dirty(vbdev);
}
return 0;
}
/* Stop OCF cache and unregister SPDK bdev */
int
vbdev_ocf_delete(struct vbdev_ocf *vbdev, void (*cb)(void *, int), void *cb_arg)
{
int rc = 0;
if (vbdev->state.started) {
spdk_bdev_unregister(&vbdev->exp_bdev, cb, cb_arg);
} else {
rc = vbdev_ocf_destruct(vbdev);
if (rc == 0 && cb) {
cb(cb_arg, 0);
}
}
return rc;
}
/* Remove cores permanently and then stop OCF cache and unregister SPDK bdev */
int
vbdev_ocf_delete_clean(struct vbdev_ocf *vbdev, void (*cb)(void *, int),
void *cb_arg)
{
vbdev->state.doing_clean_delete = true;
return vbdev_ocf_delete(vbdev, cb, cb_arg);
}
/* If vbdev is online, return its object */
struct vbdev_ocf *
vbdev_ocf_get_by_name(const char *name)
{
struct vbdev_ocf *vbdev;
if (name == NULL) {
assert(false);
return NULL;
}
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (vbdev->name == NULL || vbdev->state.doing_finish) {
continue;
}
if (strcmp(vbdev->name, name) == 0) {
return vbdev;
}
}
return NULL;
}
/* Return matching base if parent vbdev is online */
struct vbdev_ocf_base *
vbdev_ocf_get_base_by_name(const char *name)
{
struct vbdev_ocf *vbdev;
if (name == NULL) {
assert(false);
return NULL;
}
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (vbdev->state.doing_finish) {
continue;
}
if (vbdev->cache.name && strcmp(vbdev->cache.name, name) == 0) {
return &vbdev->cache;
}
if (vbdev->core.name && strcmp(vbdev->core.name, name) == 0) {
return &vbdev->core;
}
}
return NULL;
}
/* Execute fn for each OCF device that is online or waits for base devices */
void
vbdev_ocf_foreach(vbdev_ocf_foreach_fn fn, void *ctx)
{
struct vbdev_ocf *vbdev;
assert(fn != NULL);
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (!vbdev->state.doing_finish) {
fn(vbdev, ctx);
}
}
}
/* Called from OCF when SPDK_IO is completed */
static void
vbdev_ocf_io_submit_cb(struct ocf_io *io, int error)
{
struct spdk_bdev_io *bdev_io = io->priv1;
if (error == 0) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
} else if (error == -OCF_ERR_NO_MEM) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
} else {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
ocf_io_put(io);
}
/* Configure io parameters and send it to OCF */
static int
io_submit_to_ocf(struct spdk_bdev_io *bdev_io, struct ocf_io *io)
{
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_READ:
ocf_core_submit_io(io);
return 0;
case SPDK_BDEV_IO_TYPE_FLUSH:
ocf_core_submit_flush(io);
return 0;
case SPDK_BDEV_IO_TYPE_UNMAP:
ocf_core_submit_discard(io);
return 0;
case SPDK_BDEV_IO_TYPE_RESET:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
default:
SPDK_ERRLOG("Unsupported IO type: %d\n", bdev_io->type);
return -EINVAL;
}
}
/* Submit SPDK-IO to OCF */
static void
io_handle(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
struct vbdev_ocf *vbdev = bdev_io->bdev->ctxt;
struct ocf_io *io = NULL;
struct bdev_ocf_data *data = NULL;
struct vbdev_ocf_qctx *qctx = spdk_io_channel_get_ctx(ch);
uint64_t len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
uint64_t offset = bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen;
int dir, flags = 0;
int err;
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
dir = OCF_READ;
break;
case SPDK_BDEV_IO_TYPE_WRITE:
dir = OCF_WRITE;
break;
case SPDK_BDEV_IO_TYPE_FLUSH:
dir = OCF_WRITE;
break;
case SPDK_BDEV_IO_TYPE_UNMAP:
dir = OCF_WRITE;
break;
default:
err = -EINVAL;
goto fail;
}
if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
flags = OCF_WRITE_FLUSH;
}
io = ocf_volume_new_io(ocf_core_get_front_volume(vbdev->ocf_core), qctx->queue, offset, len, dir, 0,
flags);
if (!io) {
err = -ENOMEM;
goto fail;
}
data = vbdev_ocf_data_from_spdk_io(bdev_io);
if (!data) {
err = -ENOMEM;
goto fail;
}
err = ocf_io_set_data(io, data, 0);
if (err) {
goto fail;
}
ocf_io_set_cmpl(io, bdev_io, NULL, vbdev_ocf_io_submit_cb);
err = io_submit_to_ocf(bdev_io, io);
if (err) {
goto fail;
}
return;
fail:
if (io) {
ocf_io_put(io);
}
if (err == -ENOMEM) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
} else {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb When the specified buffer size to spdk_bdev_io_get_buf() is greater than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and doesn't call the specified callback function. SPDK SCSI library doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). Bdev perf tool also doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). When we support DIF insert and strip in iSCSI target, the read buffer size iSCSI initiator requests and the read buffer size iSCSI target requests will become different. Even after that, iSCSI initiator and iSCSI target will negotiate correctly not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI initiator ignores the result of negotiation, iSCSI initiator can request read buffer size larger than the permitted maximum, and can cause failure in iSCSI target. This is very flagile and should be avoided. This patch do the following - Add the completion status of spdk_bdev_io_get_buf() to spdk_bdev_io_get_buf_cb(), - spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting success to false, and return. - spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success is false. Subsequent patches will process the case that success is false in spdk_bdev_io_get_buf_cb(). Change-Id: I76429a86e18a69aa085a353ac94743296d270b82 Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-on: https://review.gerrithub.io/c/446045 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
static void
vbdev_ocf_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
bool success)
{
if (!success) {
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
return;
}
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb When the specified buffer size to spdk_bdev_io_get_buf() is greater than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and doesn't call the specified callback function. SPDK SCSI library doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). Bdev perf tool also doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). When we support DIF insert and strip in iSCSI target, the read buffer size iSCSI initiator requests and the read buffer size iSCSI target requests will become different. Even after that, iSCSI initiator and iSCSI target will negotiate correctly not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI initiator ignores the result of negotiation, iSCSI initiator can request read buffer size larger than the permitted maximum, and can cause failure in iSCSI target. This is very flagile and should be avoided. This patch do the following - Add the completion status of spdk_bdev_io_get_buf() to spdk_bdev_io_get_buf_cb(), - spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting success to false, and return. - spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success is false. Subsequent patches will process the case that success is false in spdk_bdev_io_get_buf_cb(). Change-Id: I76429a86e18a69aa085a353ac94743296d270b82 Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-on: https://review.gerrithub.io/c/446045 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
io_handle(ch, bdev_io);
}
/* Called from bdev layer when an io to Cache vbdev is submitted */
static void
vbdev_ocf_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
/* User does not have to allocate io vectors for the request,
* so in case they are not allocated, we allocate them here */
bdev: Not assert but pass completion status to spdk_bdev_io_get_buf_cb When the specified buffer size to spdk_bdev_io_get_buf() is greater than the permitted maximum, spdk_bdev_io_get_buf() asserts simply and doesn't call the specified callback function. SPDK SCSI library doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). Bdev perf tool also doesn't allocate read buffer and specifies expected read buffer size, and expects that it is allocated by spdk_bdev_io_get_buf(). When we support DIF insert and strip in iSCSI target, the read buffer size iSCSI initiator requests and the read buffer size iSCSI target requests will become different. Even after that, iSCSI initiator and iSCSI target will negotiate correctly not to cause buffer overflow in spdk_bdev_io_get_buf(), but if iSCSI initiator ignores the result of negotiation, iSCSI initiator can request read buffer size larger than the permitted maximum, and can cause failure in iSCSI target. This is very flagile and should be avoided. This patch do the following - Add the completion status of spdk_bdev_io_get_buf() to spdk_bdev_io_get_buf_cb(), - spdk_bdev_io_get_buf() calls spdk_bdev_io_get_buf_cb() by setting success to false, and return. - spdk_bdev_io_get_buf_cb() in each bdev module calls assert if success is false. Subsequent patches will process the case that success is false in spdk_bdev_io_get_buf_cb(). Change-Id: I76429a86e18a69aa085a353ac94743296d270b82 Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-on: https://review.gerrithub.io/c/446045 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
2019-02-25 00:34:28 +00:00
spdk_bdev_io_get_buf(bdev_io, vbdev_ocf_get_buf_cb,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
break;
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_FLUSH:
case SPDK_BDEV_IO_TYPE_UNMAP:
io_handle(ch, bdev_io);
break;
case SPDK_BDEV_IO_TYPE_RESET:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
default:
SPDK_ERRLOG("Unknown I/O type %d\n", bdev_io->type);
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
break;
}
}
/* Called from bdev layer */
static bool
vbdev_ocf_io_type_supported(void *opaque, enum spdk_bdev_io_type io_type)
{
struct vbdev_ocf *vbdev = opaque;
switch (io_type) {
case SPDK_BDEV_IO_TYPE_READ:
case SPDK_BDEV_IO_TYPE_WRITE:
case SPDK_BDEV_IO_TYPE_FLUSH:
case SPDK_BDEV_IO_TYPE_UNMAP:
return spdk_bdev_io_type_supported(vbdev->core.bdev, io_type);
case SPDK_BDEV_IO_TYPE_RESET:
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
default:
return false;
}
}
/* Called from bdev layer */
static struct spdk_io_channel *
vbdev_ocf_get_io_channel(void *opaque)
{
struct vbdev_ocf *bdev = opaque;
return spdk_get_io_channel(bdev);
}
static int
vbdev_ocf_dump_info_json(void *opaque, struct spdk_json_write_ctx *w)
{
struct vbdev_ocf *vbdev = opaque;
spdk_json_write_named_string(w, "cache_device", vbdev->cache.name);
spdk_json_write_named_string(w, "core_device", vbdev->core.name);
spdk_json_write_named_string(w, "mode",
ocf_get_cache_modename(ocf_cache_get_mode(vbdev->ocf_cache)));
spdk_json_write_named_uint32(w, "cache_line_size",
ocf_get_cache_line_size(vbdev->ocf_cache));
spdk_json_write_named_bool(w, "metadata_volatile",
vbdev->cfg.cache.metadata_volatile);
return 0;
}
static void
vbdev_ocf_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
{
struct vbdev_ocf *vbdev = bdev->ctxt;
spdk_json_write_object_begin(w);
spdk_json_write_named_string(w, "method", "bdev_ocf_create");
spdk_json_write_named_object_begin(w, "params");
spdk_json_write_named_string(w, "name", vbdev->name);
spdk_json_write_named_string(w, "mode",
ocf_get_cache_modename(ocf_cache_get_mode(vbdev->ocf_cache)));
spdk_json_write_named_uint32(w, "cache_line_size",
ocf_get_cache_line_size(vbdev->ocf_cache));
spdk_json_write_named_string(w, "cache_bdev_name", vbdev->cache.name);
spdk_json_write_named_string(w, "core_bdev_name", vbdev->core.name);
spdk_json_write_object_end(w);
spdk_json_write_object_end(w);
}
/* Cache vbdev function table
* Used by bdev layer */
static struct spdk_bdev_fn_table cache_dev_fn_table = {
.destruct = vbdev_ocf_destruct,
.io_type_supported = vbdev_ocf_io_type_supported,
.submit_request = vbdev_ocf_submit_request,
.get_io_channel = vbdev_ocf_get_io_channel,
.write_config_json = vbdev_ocf_write_json_config,
.dump_info_json = vbdev_ocf_dump_info_json,
};
/* Poller function for the OCF queue
* We execute OCF requests here synchronously */
static int
queue_poll(void *opaque)
{
struct vbdev_ocf_qctx *qctx = opaque;
uint32_t iono = ocf_queue_pending_io(qctx->queue);
int i, max = spdk_min(32, iono);
for (i = 0; i < max; i++) {
ocf_queue_run_single(qctx->queue);
}
if (iono > 0) {
return SPDK_POLLER_BUSY;
} else {
return SPDK_POLLER_IDLE;
}
}
/* Called during ocf_submit_io, ocf_purge*
* and any other requests that need to submit io */
static void
vbdev_ocf_ctx_queue_kick(ocf_queue_t q)
{
}
/* OCF queue deinitialization
* Called at ocf_cache_stop */
static void
vbdev_ocf_ctx_queue_stop(ocf_queue_t q)
{
struct vbdev_ocf_qctx *qctx = ocf_queue_get_priv(q);
if (qctx) {
spdk_put_io_channel(qctx->cache_ch);
spdk_put_io_channel(qctx->core_ch);
spdk_poller_unregister(&qctx->poller);
if (qctx->allocated) {
free(qctx);
}
}
}
/* Queue ops is an interface for running queue thread
* stop() operation in called just before queue gets destroyed */
const struct ocf_queue_ops queue_ops = {
.kick_sync = vbdev_ocf_ctx_queue_kick,
.kick = vbdev_ocf_ctx_queue_kick,
.stop = vbdev_ocf_ctx_queue_stop,
};
/* Called on cache vbdev creation at every thread
* We allocate OCF queues here and SPDK poller for it */
static int
io_device_create_cb(void *io_device, void *ctx_buf)
{
struct vbdev_ocf *vbdev = io_device;
struct vbdev_ocf_qctx *qctx = ctx_buf;
int rc;
ocf: lock queue list during create/delete OCF queue list needs to be managed synchronously. This patch uses our own mutex to achieve that because we cannot rely on ocf_mngt_cache_lock() as it may produce deadlocks when using cleaner. Alternative way would be to use trylock, but we need to register a poller for it and do locking concurently which doesn't seem to be possible in callbacks of io channel. We agreed with OCF team that we are going to change this part when OCF will deliver safe ocf_mngt_cache_lock() function. This patch fixes a very rare failure on our CI that looked like this: ``` 04:33:02 vbdev_ocf.c: 134:stop_vbdev: *NOTICE*: Not stopping cache instance 'Malloc0' because it is referenced by other OCF bdev 04:33:03 MalCache1: Core core2 successfully removed 04:33:03 MalCache1: Stopping cache 04:33:03 MalCache1: Done saving cache state! 04:33:03 src/ocf/mngt/ocf_mngt_cache.c:1576:2: runtime error: pointer index expression with base 0x000000000000 overflowed to 0xffffffffffffffa8 04:33:03 #0 0x7f3c52d54c26 in _ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1576 04:33:03 #1 0x7f3c52d5579f in ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1657 04:33:03 #2 0x7f3c52cbe9f4 in stop_vbdev /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:147 04:33:03 #3 0x7f3c52cbf4a0 in vbdev_ocf_destruct /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:216 ``` Change-Id: Id6fafb444958f3becdc480e44762074c6c081e1f Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/450682 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-04-23 22:34:25 +00:00
rc = vbdev_ocf_queue_create(vbdev->ocf_cache, &qctx->queue, &queue_ops);
if (rc) {
return rc;
}
ocf_queue_set_priv(qctx->queue, qctx);
qctx->vbdev = vbdev;
qctx->cache_ch = spdk_bdev_get_io_channel(vbdev->cache.desc);
qctx->core_ch = spdk_bdev_get_io_channel(vbdev->core.desc);
qctx->poller = SPDK_POLLER_REGISTER(queue_poll, qctx, 0);
return rc;
}
/* Called per thread
* Put OCF queue and relaunch poller with new context to finish pending requests */
static void
io_device_destroy_cb(void *io_device, void *ctx_buf)
{
/* Making a copy of context to use it after io channel will be destroyed */
struct vbdev_ocf_qctx *copy = malloc(sizeof(*copy));
struct vbdev_ocf_qctx *qctx = ctx_buf;
if (copy) {
ocf_queue_set_priv(qctx->queue, copy);
memcpy(copy, qctx, sizeof(*copy));
spdk_poller_unregister(&qctx->poller);
copy->poller = SPDK_POLLER_REGISTER(queue_poll, copy, 0);
copy->allocated = true;
} else {
SPDK_ERRLOG("Unable to stop OCF queue properly: %s\n",
spdk_strerror(ENOMEM));
}
ocf: lock queue list during create/delete OCF queue list needs to be managed synchronously. This patch uses our own mutex to achieve that because we cannot rely on ocf_mngt_cache_lock() as it may produce deadlocks when using cleaner. Alternative way would be to use trylock, but we need to register a poller for it and do locking concurently which doesn't seem to be possible in callbacks of io channel. We agreed with OCF team that we are going to change this part when OCF will deliver safe ocf_mngt_cache_lock() function. This patch fixes a very rare failure on our CI that looked like this: ``` 04:33:02 vbdev_ocf.c: 134:stop_vbdev: *NOTICE*: Not stopping cache instance 'Malloc0' because it is referenced by other OCF bdev 04:33:03 MalCache1: Core core2 successfully removed 04:33:03 MalCache1: Stopping cache 04:33:03 MalCache1: Done saving cache state! 04:33:03 src/ocf/mngt/ocf_mngt_cache.c:1576:2: runtime error: pointer index expression with base 0x000000000000 overflowed to 0xffffffffffffffa8 04:33:03 #0 0x7f3c52d54c26 in _ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1576 04:33:03 #1 0x7f3c52d5579f in ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1657 04:33:03 #2 0x7f3c52cbe9f4 in stop_vbdev /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:147 04:33:03 #3 0x7f3c52cbf4a0 in vbdev_ocf_destruct /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:216 ``` Change-Id: Id6fafb444958f3becdc480e44762074c6c081e1f Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/450682 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-04-23 22:34:25 +00:00
vbdev_ocf_queue_put(qctx->queue);
}
/* OCF management queue deinitialization */
static void
vbdev_ocf_ctx_mngt_queue_stop(ocf_queue_t q)
{
struct spdk_poller *poller = ocf_queue_get_priv(q);
if (poller) {
spdk_poller_unregister(&poller);
}
}
static int
mngt_queue_poll(void *opaque)
{
ocf_queue_t q = opaque;
uint32_t iono = ocf_queue_pending_io(q);
int i, max = spdk_min(32, iono);
for (i = 0; i < max; i++) {
ocf_queue_run_single(q);
}
if (iono > 0) {
return SPDK_POLLER_BUSY;
} else {
return SPDK_POLLER_IDLE;
}
}
static void
vbdev_ocf_ctx_mngt_queue_kick(ocf_queue_t q)
{
}
/* Queue ops is an interface for running queue thread
* stop() operation in called just before queue gets destroyed */
const struct ocf_queue_ops mngt_queue_ops = {
.kick_sync = NULL,
.kick = vbdev_ocf_ctx_mngt_queue_kick,
.stop = vbdev_ocf_ctx_mngt_queue_stop,
};
static void
vbdev_ocf_mngt_exit(struct vbdev_ocf *vbdev, vbdev_ocf_mngt_fn *rollback_path, int rc)
{
vbdev->state.starting = false;
vbdev_ocf_mngt_stop(vbdev, rollback_path, rc);
}
/* Create exported spdk object */
static void
finish_register(struct vbdev_ocf *vbdev)
{
int result;
/* Copy properties of the base bdev */
vbdev->exp_bdev.blocklen = vbdev->core.bdev->blocklen;
vbdev->exp_bdev.write_cache = vbdev->core.bdev->write_cache;
vbdev->exp_bdev.required_alignment = vbdev->core.bdev->required_alignment;
vbdev->exp_bdev.name = vbdev->name;
vbdev->exp_bdev.product_name = "SPDK OCF";
vbdev->exp_bdev.blockcnt = vbdev->core.bdev->blockcnt;
vbdev->exp_bdev.ctxt = vbdev;
vbdev->exp_bdev.fn_table = &cache_dev_fn_table;
vbdev->exp_bdev.module = &ocf_if;
/* Finally register vbdev in SPDK */
spdk_io_device_register(vbdev, io_device_create_cb, io_device_destroy_cb,
sizeof(struct vbdev_ocf_qctx), vbdev->name);
result = spdk_bdev_register(&vbdev->exp_bdev);
if (result) {
SPDK_ERRLOG("Could not register exposed bdev %s\n",
vbdev->name);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, result);
return;
} else {
vbdev->state.started = true;
}
vbdev_ocf_mngt_continue(vbdev, result);
}
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
static void
add_core_cmpl(ocf_cache_t cache, ocf_core_t core, void *priv, int error)
{
struct vbdev_ocf *vbdev = priv;
ocf_mngt_cache_unlock(cache);
if (error) {
SPDK_ERRLOG("Error %d, failed to add core device to cache instance %s,"
"starting rollback\n", error, vbdev->name);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, error);
return;
} else {
vbdev->ocf_core = core;
}
vbdev_ocf_mngt_continue(vbdev, error);
}
/* Try to lock cache, then add core */
static void
add_core_cache_lock_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = (struct vbdev_ocf *)priv;
if (error) {
SPDK_ERRLOG("Error %d, can not lock cache instance %s,"
"starting rollback\n", error, vbdev->name);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, error);
}
ocf_mngt_cache_add_core(vbdev->ocf_cache, &vbdev->cfg.core, add_core_cmpl, vbdev);
}
/* Add core for existing OCF cache instance */
static void
add_core(struct vbdev_ocf *vbdev)
{
ocf_mngt_cache_lock(vbdev->ocf_cache, add_core_cache_lock_cmpl, vbdev);
}
static void
start_cache_cmpl(ocf_cache_t cache, void *priv, int error)
{
struct vbdev_ocf *vbdev = priv;
uint64_t mem_needed;
ocf_mngt_cache_unlock(cache);
if (error) {
SPDK_ERRLOG("Error %d during start cache %s, starting rollback\n",
error, vbdev->name);
if (error == -OCF_ERR_NO_MEM) {
ocf_mngt_get_ram_needed(cache, &vbdev->cfg.attach.device, &mem_needed);
SPDK_NOTICELOG("Try to increase hugepage memory size or cache line size. "
"For your configuration:\nDevice size: %"PRIu64" bytes\n"
"Cache line size: %"PRIu64" bytes\nFree memory needed to start "
"cache: %"PRIu64" bytes\n", vbdev->cache.bdev->blockcnt *
vbdev->cache.bdev->blocklen, vbdev->cfg.cache.cache_line_size,
mem_needed);
}
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, error);
return;
}
vbdev_ocf_mngt_continue(vbdev, error);
}
static int
create_management_queue(struct vbdev_ocf *vbdev)
{
struct spdk_poller *mngt_poller;
int rc;
rc = vbdev_ocf_queue_create(vbdev->ocf_cache, &vbdev->cache_ctx->mngt_queue, &mngt_queue_ops);
if (rc) {
SPDK_ERRLOG("Unable to create mngt_queue: %d\n", rc);
return rc;
}
mngt_poller = SPDK_POLLER_REGISTER(mngt_queue_poll, vbdev->cache_ctx->mngt_queue, 100);
if (mngt_poller == NULL) {
SPDK_ERRLOG("Unable to initiate mngt request: %s", spdk_strerror(ENOMEM));
return -ENOMEM;
}
ocf_queue_set_priv(vbdev->cache_ctx->mngt_queue, mngt_poller);
ocf_mngt_cache_set_mngt_queue(vbdev->ocf_cache, vbdev->cache_ctx->mngt_queue);
return 0;
}
/* Start OCF cache, attach caching device */
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
static void
start_cache(struct vbdev_ocf *vbdev)
{
ocf_cache_t existing;
uint32_t cache_block_size = vbdev->cache.bdev->blocklen;
uint32_t core_block_size = vbdev->core.bdev->blocklen;
int rc;
if (is_ocf_cache_running(vbdev)) {
vbdev_ocf_mngt_stop(vbdev, NULL, -EALREADY);
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
return;
}
if (cache_block_size > core_block_size) {
SPDK_ERRLOG("Cache bdev block size (%d) is bigger then core bdev block size (%d)\n",
cache_block_size, core_block_size);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, -EINVAL);
return;
}
existing = get_other_cache_instance(vbdev);
if (existing) {
SPDK_NOTICELOG("OCF bdev %s connects to existing cache device %s\n",
vbdev->name, vbdev->cache.name);
vbdev->ocf_cache = existing;
bdev/ocf: take additional reference for ocf_cache Fixes #1498 When shutting down the application, it was possible to reference stale ocf_cache pointer. This was the case when two or more vbdev_ocf devices were based on top of single cache bdev. This issue did not occur outside of the shutdown case, since RPC only allows deletion of the vbdev_ocf. This erases on disk metadata and next run of the application, would not detect such vbdev_ocf. Shutdown meanwhile works different, by first stopping the instance of running "ocf_mngt_cache" and later detaching "core" devices (the ones being cached). This prevented erasing the on disk metadata and allowed for restarted application to detect vbdev_ocf. See patch (1292ef2) for details. Since references to ocf_cache are copied between vbdev_ocf [see start_cache()], the reference count inside ocf_cache was limited to original ocf_mngt_cache_start() and management queue creation. First call into ocf_mngt_cache_stop() released all references to ocf_cache. Leaving other vbdev_ocfs pointing to released memory. This patch works around this issue by increasing ref cnt on ocf_cache for each vbdev based on top of it. It allows to call into ocf_mngt_cache_stop(), but not release the memory for ocf_cache until last vbdev. Note: A proper redesign here is in order: - either rearranging structures to be based around single ocf_cache, rather than multiple vbdev_ocf instances - better use of OCF API to reduce book keeping logic in vbdev There are plans to implement detach/attach in RPC, so it should be a focus during that effort. Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Change-Id: I560a7fbb1c052bf53970e655bdb60803c561a252 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3574 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2020-07-28 14:43:06 +00:00
ocf_mngt_cache_get(vbdev->ocf_cache);
vbdev->cache_ctx = ocf_cache_get_priv(existing);
vbdev_ocf_cache_ctx_get(vbdev->cache_ctx);
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
vbdev_ocf_mngt_continue(vbdev, 0);
return;
}
vbdev->cache_ctx = calloc(1, sizeof(struct vbdev_ocf_cache_ctx));
if (vbdev->cache_ctx == NULL) {
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, -ENOMEM);
return;
}
vbdev_ocf_cache_ctx_get(vbdev->cache_ctx);
ocf: lock queue list during create/delete OCF queue list needs to be managed synchronously. This patch uses our own mutex to achieve that because we cannot rely on ocf_mngt_cache_lock() as it may produce deadlocks when using cleaner. Alternative way would be to use trylock, but we need to register a poller for it and do locking concurently which doesn't seem to be possible in callbacks of io channel. We agreed with OCF team that we are going to change this part when OCF will deliver safe ocf_mngt_cache_lock() function. This patch fixes a very rare failure on our CI that looked like this: ``` 04:33:02 vbdev_ocf.c: 134:stop_vbdev: *NOTICE*: Not stopping cache instance 'Malloc0' because it is referenced by other OCF bdev 04:33:03 MalCache1: Core core2 successfully removed 04:33:03 MalCache1: Stopping cache 04:33:03 MalCache1: Done saving cache state! 04:33:03 src/ocf/mngt/ocf_mngt_cache.c:1576:2: runtime error: pointer index expression with base 0x000000000000 overflowed to 0xffffffffffffffa8 04:33:03 #0 0x7f3c52d54c26 in _ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1576 04:33:03 #1 0x7f3c52d5579f in ocf_mngt_cache_stop src/ocf/mngt/ocf_mngt_cache.c:1657 04:33:03 #2 0x7f3c52cbe9f4 in stop_vbdev /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:147 04:33:03 #3 0x7f3c52cbf4a0 in vbdev_ocf_destruct /var/jenkins/workspace/NVMe_tests/nvme_phy_autotest/spdk/lib/bdev/ocf/vbdev_ocf.c:216 ``` Change-Id: Id6fafb444958f3becdc480e44762074c6c081e1f Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/450682 Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-04-23 22:34:25 +00:00
pthread_mutex_init(&vbdev->cache_ctx->lock, NULL);
rc = ocf_mngt_cache_start(vbdev_ocf_ctx, &vbdev->ocf_cache, &vbdev->cfg.cache, NULL);
if (rc) {
SPDK_ERRLOG("Could not start cache %s: %d\n", vbdev->name, rc);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, rc);
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
return;
}
bdev/ocf: take additional reference for ocf_cache Fixes #1498 When shutting down the application, it was possible to reference stale ocf_cache pointer. This was the case when two or more vbdev_ocf devices were based on top of single cache bdev. This issue did not occur outside of the shutdown case, since RPC only allows deletion of the vbdev_ocf. This erases on disk metadata and next run of the application, would not detect such vbdev_ocf. Shutdown meanwhile works different, by first stopping the instance of running "ocf_mngt_cache" and later detaching "core" devices (the ones being cached). This prevented erasing the on disk metadata and allowed for restarted application to detect vbdev_ocf. See patch (1292ef2) for details. Since references to ocf_cache are copied between vbdev_ocf [see start_cache()], the reference count inside ocf_cache was limited to original ocf_mngt_cache_start() and management queue creation. First call into ocf_mngt_cache_stop() released all references to ocf_cache. Leaving other vbdev_ocfs pointing to released memory. This patch works around this issue by increasing ref cnt on ocf_cache for each vbdev based on top of it. It allows to call into ocf_mngt_cache_stop(), but not release the memory for ocf_cache until last vbdev. Note: A proper redesign here is in order: - either rearranging structures to be based around single ocf_cache, rather than multiple vbdev_ocf instances - better use of OCF API to reduce book keeping logic in vbdev There are plans to implement detach/attach in RPC, so it should be a focus during that effort. Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Change-Id: I560a7fbb1c052bf53970e655bdb60803c561a252 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3574 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2020-07-28 14:43:06 +00:00
ocf_mngt_cache_get(vbdev->ocf_cache);
ocf_cache_set_priv(vbdev->ocf_cache, vbdev->cache_ctx);
rc = create_management_queue(vbdev);
if (rc) {
SPDK_ERRLOG("Unable to create mngt_queue: %d\n", rc);
vbdev_ocf_mngt_exit(vbdev, unregister_path_dirty, rc);
return;
}
if (vbdev->cfg.loadq) {
ocf_mngt_cache_load(vbdev->ocf_cache, &vbdev->cfg.attach, start_cache_cmpl, vbdev);
} else {
ocf_mngt_cache_attach(vbdev->ocf_cache, &vbdev->cfg.attach, start_cache_cmpl, vbdev);
}
}
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
/* Procedures called during register operation */
vbdev_ocf_mngt_fn register_path[] = {
start_cache,
add_core,
finish_register,
NULL
};
/* Start cache instance and register OCF bdev */
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
static void
register_vbdev(struct vbdev_ocf *vbdev, vbdev_ocf_mngt_callback cb, void *cb_arg)
{
int rc;
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
if (!(vbdev->core.attached && vbdev->cache.attached) || vbdev->state.started) {
cb(-EPERM, vbdev, cb_arg);
return;
}
vbdev->state.starting = true;
ocf: use vbdev_ocf_mngt_ interface in register path This patch is a preparation for adopting ocf_cache_trylock() function. Register OCF bdev path uses vbdev_ocf_mngt_ interface now Register stays synchronous blocking operation in this patch, but with adoption of ocf_cache_trylock() function, register will be asynchronous, in which case we want to use mngt_ interface. This series of OCF patches will enable WriteBack support for OCF bdev. There is a lot of preparation before we will be able to actually implement WriteBack. Dependencies look like this: - WriteBack - Cleaner - trylock - Persistent metadata - asynchronous management API Cleaner is a background agent that does synchronization of data between cache and its cores. Cleaner usually runs every ~30 seconds to perform cleaning. The synchronization is a simmilar operation to OCF management flushes. We need cleaner for WriteBack because only WriteBack mode produces dirty data that cleaner needs to deal with. Cleaner requires adopting trylock() because in current version cleaner uses management lock when performs cleaning, which may lead to deadlocks if cleaner runs on the same thread as management operations. Peristent metadata is functionality of OCF that allows to restore cache state after shutdown We need persistent metadata in context of shutdown with some data being dirty which may only happen when using WriteBack mode Support for persistent metadata requieres asynchronous OCF API because in current version we will have a deadlock during metadata initialization because it is required to be done on different thread than cache initialization. Change-Id: I45d84a5fa0c96581d522050dad186b06d489226e Signed-off-by: Vitaliy Mysak <vitaliy.mysak@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455225 Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
2019-05-20 22:39:15 +00:00
rc = vbdev_ocf_mngt_start(vbdev, register_path, cb, cb_arg);
if (rc) {
cb(rc, vbdev, cb_arg);
}
}
/* Init OCF configuration options
* for core and cache devices */
static int
init_vbdev_config(struct vbdev_ocf *vbdev)
{
struct vbdev_ocf_config *cfg = &vbdev->cfg;
struct ocf_volume_uuid uuid;
ocf_volume_type_t type;
int ret;
/* Initialize OCF defaults first */
ocf_mngt_cache_attach_config_set_default(&cfg->attach);
ocf_mngt_cache_config_set_default(&cfg->cache);
ocf_mngt_core_config_set_default(&cfg->core);
snprintf(cfg->cache.name, sizeof(cfg->cache.name), "%s", vbdev->name);
snprintf(cfg->core.name, sizeof(cfg->core.name), "%s", vbdev->core.name);
cfg->attach.open_cores = false;
cfg->attach.device.perform_test = false;
cfg->attach.discard_on_start = false;
vbdev->cfg.cache.locked = true;
cfg->core.volume_type = SPDK_OBJECT;
if (vbdev->cfg.loadq) {
/* When doing cache_load(), we need to set try_add to true,
* otherwise OCF will interpret this core as new
* instead of the inactive one */
vbdev->cfg.core.try_add = true;
} else {
/* When cache is initialized as new, set force flag to true,
* to ignore warnings about existing metadata */
cfg->attach.force = true;
}
/* Serialize bdev names in OCF UUID to interpret on future loads
* Core UUID is a triple of (core name, vbdev name, cache name)
* Cache UUID is cache bdev name */
type = ocf_ctx_get_volume_type(vbdev_ocf_ctx, SPDK_OBJECT);
if (!type) {
SPDK_ERRLOG("Fail to get volume type\n");
return -EINVAL;
}
uuid.size = strlen(vbdev->cache.name) + 1;
uuid.data = vbdev->cache.name;
ret = ocf_volume_create(&cfg->attach.device.volume, type, &uuid);
if (ret) {
SPDK_ERRLOG("Fail to create volume\n");
return -EINVAL;
}
snprintf(vbdev->uuid, VBDEV_OCF_MD_MAX_LEN, "%s %s %s",
vbdev->core.name, vbdev->name, vbdev->cache.name);
cfg->core.uuid.size = strlen(vbdev->uuid) + 1;
cfg->core.uuid.data = vbdev->uuid;
vbdev->uuid[strlen(vbdev->core.name)] = 0;
vbdev->uuid[strlen(vbdev->core.name) + 1 + strlen(vbdev->name)] = 0;
return 0;
}
/* Allocate vbdev structure object and add it to the global list */
static int
init_vbdev(const char *vbdev_name,
const char *cache_mode_name,
const uint64_t cache_line_size,
const char *cache_name,
const char *core_name,
bool loadq)
{
struct vbdev_ocf *vbdev;
int rc = 0;
if (spdk_bdev_get_by_name(vbdev_name) || vbdev_ocf_get_by_name(vbdev_name)) {
SPDK_ERRLOG("Device with name '%s' already exists\n", vbdev_name);
return -EPERM;
}
vbdev = calloc(1, sizeof(*vbdev));
if (!vbdev) {
goto error_mem;
}
vbdev->name = strdup(vbdev_name);
if (!vbdev->name) {
goto error_mem;
}
vbdev->cache.name = strdup(cache_name);
if (!vbdev->cache.name) {
goto error_mem;
}
vbdev->core.name = strdup(core_name);
if (!vbdev->core.name) {
goto error_mem;
}
vbdev->cache.parent = vbdev;
vbdev->core.parent = vbdev;
vbdev->cache.is_cache = true;
vbdev->core.is_cache = false;
vbdev->cfg.loadq = loadq;
rc = init_vbdev_config(vbdev);
if (rc) {
SPDK_ERRLOG("Fail to init vbdev config\n");
goto error_free;
}
if (cache_mode_name) {
vbdev->cfg.cache.cache_mode
= ocf_get_cache_mode(cache_mode_name);
} else if (!loadq) { /* In load path it is OK to pass NULL as cache mode */
SPDK_ERRLOG("No cache mode specified\n");
rc = -EINVAL;
goto error_free;
}
if (vbdev->cfg.cache.cache_mode < 0) {
SPDK_ERRLOG("Incorrect cache mode '%s'\n", cache_mode_name);
rc = -EINVAL;
goto error_free;
}
ocf_cache_line_size_t set_cache_line_size = cache_line_size ?
(ocf_cache_line_size_t)cache_line_size * KiB :
ocf_cache_line_size_default;
if (set_cache_line_size == 0) {
SPDK_ERRLOG("Cache line size should be non-zero.\n");
rc = -EINVAL;
goto error_free;
}
vbdev->cfg.attach.cache_line_size = set_cache_line_size;
vbdev->cfg.cache.cache_line_size = set_cache_line_size;
TAILQ_INSERT_TAIL(&g_ocf_vbdev_head, vbdev, tailq);
return rc;
error_mem:
rc = -ENOMEM;
error_free:
free_vbdev(vbdev);
return rc;
}
/* This deprecation is likely to be removed, and the ocf support will remain
* in SPDK. See deprecation.md for more details.
*/
SPDK_LOG_DEPRECATION_REGISTER(bdev_ocf, "bdev_ocf support", "SPDK 23.09", 0);
/* Read configuration file at the start of SPDK application
* This adds vbdevs to global list if some mentioned in config */
static int
vbdev_ocf_init(void)
{
int status;
status = vbdev_ocf_ctx_init();
if (status) {
SPDK_ERRLOG("OCF ctx initialization failed with=%d\n", status);
return status;
}
status = vbdev_ocf_volume_init();
if (status) {
vbdev_ocf_ctx_cleanup();
SPDK_ERRLOG("OCF volume initialization failed with=%d\n", status);
return status;
}
return status;
}
/* Called after application shutdown started
* Release memory of allocated structures here */
static void
vbdev_ocf_module_fini(void)
{
struct vbdev_ocf *vbdev;
while ((vbdev = TAILQ_FIRST(&g_ocf_vbdev_head))) {
TAILQ_REMOVE(&g_ocf_vbdev_head, vbdev, tailq);
free_vbdev(vbdev);
}
vbdev_ocf_volume_cleanup();
vbdev_ocf_ctx_cleanup();
}
/* When base device gets unplugged this is called
* We will unregister cache vbdev here
* When cache device is removed, we delete every OCF bdev that used it */
static void
hotremove_cb(struct vbdev_ocf_base *base)
{
struct vbdev_ocf *vbdev;
if (!base->is_cache) {
if (base->parent->state.doing_finish) {
return;
}
SPDK_NOTICELOG("Deinitializing '%s' because its core device '%s' was removed\n",
base->parent->name, base->name);
vbdev_ocf_delete(base->parent, NULL, NULL);
return;
}
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (vbdev->state.doing_finish) {
continue;
}
if (strcmp(base->name, vbdev->cache.name) == 0) {
SPDK_NOTICELOG("Deinitializing '%s' because"
" its cache device '%s' was removed\n",
vbdev->name, base->name);
vbdev_ocf_delete(vbdev, NULL, NULL);
}
}
}
static void
base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
void *event_ctx)
{
switch (type) {
case SPDK_BDEV_EVENT_REMOVE:
if (event_ctx) {
hotremove_cb(event_ctx);
}
break;
default:
SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
break;
}
}
/* Open base SPDK bdev and claim it */
static int
attach_base(struct vbdev_ocf_base *base)
{
int status;
if (base->attached) {
return -EALREADY;
}
/* If base cache bdev was already opened by other vbdev,
* we just copy its descriptor here */
if (base->is_cache) {
struct vbdev_ocf_base *existing = get_other_cache_base(base);
if (existing) {
base->desc = existing->desc;
base->management_channel = existing->management_channel;
base->attached = true;
return 0;
}
}
status = spdk_bdev_open_ext(base->name, true, base_bdev_event_cb, base, &base->desc);
if (status) {
SPDK_ERRLOG("Unable to open device '%s' for writing\n", base->name);
return status;
}
status = spdk_bdev_module_claim_bdev(base->bdev, base->desc,
&ocf_if);
if (status) {
SPDK_ERRLOG("Unable to claim device '%s'\n", base->name);
spdk_bdev_close(base->desc);
return status;
}
base->management_channel = spdk_bdev_get_io_channel(base->desc);
if (!base->management_channel) {
SPDK_ERRLOG("Unable to get io channel '%s'\n", base->name);
spdk_bdev_module_release_bdev(base->bdev);
spdk_bdev_close(base->desc);
return -ENOMEM;
}
/* Save the thread where the base device is opened */
base->thread = spdk_get_thread();
base->attached = true;
return status;
}
/* Attach base bdevs */
static int
attach_base_bdevs(struct vbdev_ocf *vbdev,
struct spdk_bdev *cache_bdev,
struct spdk_bdev *core_bdev)
{
int rc = 0;
if (cache_bdev) {
vbdev->cache.bdev = cache_bdev;
rc |= attach_base(&vbdev->cache);
}
if (core_bdev) {
vbdev->core.bdev = core_bdev;
rc |= attach_base(&vbdev->core);
}
return rc;
}
/* Init and then start vbdev if all base devices are present */
void
vbdev_ocf_construct(const char *vbdev_name,
const char *cache_mode_name,
const uint64_t cache_line_size,
const char *cache_name,
const char *core_name,
bool loadq,
void (*cb)(int, struct vbdev_ocf *, void *),
void *cb_arg)
{
int rc;
struct spdk_bdev *cache_bdev = spdk_bdev_get_by_name(cache_name);
struct spdk_bdev *core_bdev = spdk_bdev_get_by_name(core_name);
struct vbdev_ocf *vbdev;
rc = init_vbdev(vbdev_name, cache_mode_name, cache_line_size, cache_name, core_name, loadq);
if (rc) {
cb(rc, NULL, cb_arg);
return;
}
vbdev = vbdev_ocf_get_by_name(vbdev_name);
if (vbdev == NULL) {
cb(-ENODEV, NULL, cb_arg);
return;
}
if (cache_bdev == NULL) {
SPDK_NOTICELOG("OCF bdev '%s' is waiting for cache device '%s' to connect\n",
vbdev->name, cache_name);
}
if (core_bdev == NULL) {
SPDK_NOTICELOG("OCF bdev '%s' is waiting for core device '%s' to connect\n",
vbdev->name, core_name);
}
rc = attach_base_bdevs(vbdev, cache_bdev, core_bdev);
if (rc) {
cb(rc, vbdev, cb_arg);
return;
}
if (core_bdev && cache_bdev) {
register_vbdev(vbdev, cb, cb_arg);
} else {
cb(0, vbdev, cb_arg);
}
}
/* Set new cache mode on OCF cache */
void
vbdev_ocf_set_cache_mode(struct vbdev_ocf *vbdev,
const char *cache_mode_name,
void (*cb)(int, struct vbdev_ocf *, void *),
void *cb_arg)
{
ocf_cache_t cache;
ocf_cache_mode_t cache_mode;
int rc;
cache = vbdev->ocf_cache;
cache_mode = ocf_get_cache_mode(cache_mode_name);
rc = ocf_mngt_cache_trylock(cache);
if (rc) {
cb(rc, vbdev, cb_arg);
return;
}
rc = ocf_mngt_cache_set_mode(cache, cache_mode);
ocf_mngt_cache_unlock(cache);
cb(rc, vbdev, cb_arg);
}
/* Set sequential cutoff parameters on OCF cache */
void
vbdev_ocf_set_seqcutoff(struct vbdev_ocf *vbdev, const char *policy_name, uint32_t threshold,
uint32_t promotion_count, void (*cb)(int, void *), void *cb_arg)
{
ocf_cache_t cache;
ocf_seq_cutoff_policy policy;
int rc;
cache = vbdev->ocf_cache;
policy = ocf_get_seqcutoff_policy(policy_name);
if (policy == ocf_seq_cutoff_policy_max) {
cb(OCF_ERR_INVAL, cb_arg);
return;
}
rc = ocf_mngt_cache_trylock(cache);
if (rc) {
cb(rc, cb_arg);
return;
}
rc = ocf_mngt_core_set_seq_cutoff_policy_all(cache, policy);
if (rc) {
goto end;
}
if (threshold) {
threshold = threshold * KiB;
rc = ocf_mngt_core_set_seq_cutoff_threshold_all(cache, threshold);
if (rc) {
goto end;
}
}
if (promotion_count) {
rc = ocf_mngt_core_set_seq_cutoff_promotion_count_all(cache, promotion_count);
}
end:
ocf_mngt_cache_unlock(cache);
cb(rc, cb_arg);
}
/* This called if new device is created in SPDK application
* If that device named as one of base bdevs of OCF vbdev,
* claim and open them */
static void
vbdev_ocf_examine(struct spdk_bdev *bdev)
{
const char *bdev_name = spdk_bdev_get_name(bdev);
struct vbdev_ocf *vbdev;
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (vbdev->state.doing_finish) {
continue;
}
if (!strcmp(bdev_name, vbdev->cache.name)) {
attach_base_bdevs(vbdev, bdev, NULL);
continue;
}
if (!strcmp(bdev_name, vbdev->core.name)) {
attach_base_bdevs(vbdev, NULL, bdev);
break;
}
}
spdk_bdev_module_examine_done(&ocf_if);
}
struct metadata_probe_ctx {
struct vbdev_ocf_base base;
ocf_volume_t volume;
struct ocf_volume_uuid *core_uuids;
unsigned int uuid_count;
int result;
int refcnt;
};
static void
_examine_ctx_put(void *ctx)
{
struct spdk_bdev_desc *desc = ctx;
spdk_bdev_close(desc);
}
static void
examine_ctx_put(struct metadata_probe_ctx *ctx)
{
unsigned int i;
ctx->refcnt--;
if (ctx->refcnt > 0) {
return;
}
if (ctx->result) {
SPDK_ERRLOG("OCF metadata probe for bdev '%s' failed with %d\n",
spdk_bdev_get_name(ctx->base.bdev), ctx->result);
}
if (ctx->base.desc) {
/* Close the underlying bdev on its same opened thread. */
if (ctx->base.thread && ctx->base.thread != spdk_get_thread()) {
spdk_thread_send_msg(ctx->base.thread, _examine_ctx_put, ctx->base.desc);
} else {
spdk_bdev_close(ctx->base.desc);
}
}
if (ctx->volume) {
ocf_volume_destroy(ctx->volume);
}
if (ctx->core_uuids) {
for (i = 0; i < ctx->uuid_count; i++) {
free(ctx->core_uuids[i].data);
}
}
free(ctx->core_uuids);
examine_done(ctx->result, NULL, ctx->base.bdev);
free(ctx);
}
static void
metadata_probe_cb(void *priv, int rc,
struct ocf_metadata_probe_status *status)
{
struct metadata_probe_ctx *ctx = priv;
if (rc) {
/* -ENODATA means device does not have cache metadata on it */
if (rc != -OCF_ERR_NO_METADATA) {
ctx->result = rc;
}
}
examine_ctx_put(ctx);
}
/* This is called after vbdev_ocf_examine
* It allows to delay application initialization
* until all OCF bdevs get registered
* If vbdev has all of its base devices it starts asynchronously here
* We first check if bdev appears in configuration,
* if not we do metadata_probe() to create its configuration from bdev metadata */
static void
vbdev_ocf_examine_disk(struct spdk_bdev *bdev)
{
const char *bdev_name = spdk_bdev_get_name(bdev);
struct vbdev_ocf *vbdev;
struct metadata_probe_ctx *ctx;
bool created_from_config = false;
int rc;
examine_start(bdev);
TAILQ_FOREACH(vbdev, &g_ocf_vbdev_head, tailq) {
if (vbdev->state.doing_finish || vbdev->state.started) {
continue;
}
if (!strcmp(bdev_name, vbdev->cache.name)) {
examine_start(bdev);
register_vbdev(vbdev, examine_done, bdev);
created_from_config = true;
continue;
}
if (!strcmp(bdev_name, vbdev->core.name)) {
examine_start(bdev);
register_vbdev(vbdev, examine_done, bdev);
examine_done(0, NULL, bdev);
return;
}
}
/* If devices is discovered during config we do not check for metadata */
if (created_from_config) {
examine_done(0, NULL, bdev);
return;
}
/* Metadata probe path
* We create temporary OCF volume and a temporary base structure
* to use them for ocf_metadata_probe() and for bottom adapter IOs
* Then we get UUIDs of core devices an create configurations based on them */
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
examine_done(-ENOMEM, NULL, bdev);
return;
}
ctx->base.bdev = bdev;
ctx->refcnt = 1;
rc = spdk_bdev_open_ext(bdev_name, true, base_bdev_event_cb, NULL, &ctx->base.desc);
if (rc) {
ctx->result = rc;
examine_ctx_put(ctx);
return;
}
rc = ocf_ctx_volume_create(vbdev_ocf_ctx, &ctx->volume, NULL, SPDK_OBJECT);
if (rc) {
ctx->result = rc;
examine_ctx_put(ctx);
return;
}
rc = ocf_volume_open(ctx->volume, &ctx->base);
if (rc) {
ctx->result = rc;
examine_ctx_put(ctx);
return;
}
/* Save the thread where the base device is opened */
ctx->base.thread = spdk_get_thread();
ocf_metadata_probe(vbdev_ocf_ctx, ctx->volume, metadata_probe_cb, ctx);
}
static int
vbdev_ocf_get_ctx_size(void)
{
return sizeof(struct bdev_ocf_data);
}
static void
fini_start(void)
{
g_fini_started = true;
}
/* Module-global function table
* Does not relate to vbdev instances */
static struct spdk_bdev_module ocf_if = {
.name = "ocf",
.module_init = vbdev_ocf_init,
.fini_start = fini_start,
.module_fini = vbdev_ocf_module_fini,
.get_ctx_size = vbdev_ocf_get_ctx_size,
.examine_config = vbdev_ocf_examine,
.examine_disk = vbdev_ocf_examine_disk,
};
SPDK_BDEV_MODULE_REGISTER(ocf, &ocf_if);