doc: describe ublk target in user guide
Signed-off-by: Liu Xiaodong <xiaodong.liu@intel.com> Change-Id: I0de47e21a34d7766c4addd6f751098b03d8a4a9e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16245 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
parent
6c0da14649
commit
651c558d0e
@ -847,6 +847,7 @@ INPUT += \
|
||||
spdk_top.md \
|
||||
ssd_internals.md \
|
||||
system_configuration.md \
|
||||
ublk.md \
|
||||
usdt.md \
|
||||
userspace.md \
|
||||
vagrant.md \
|
||||
|
41
doc/img/ublk_service.svg
Normal file
41
doc/img/ublk_service.svg
Normal file
@ -0,0 +1,41 @@
|
||||
<?xml version="1.0"?>
|
||||
<svg width="680" height="420" xmlns="http://www.w3.org/2000/svg" xmlns:svg="http://www.w3.org/2000/svg">
|
||||
<!-- Created with SVG-edit - https://github.com/SVG-Edit/svgedit-->
|
||||
<g class="layer">
|
||||
<title>Layer 1</title>
|
||||
<rect fill="#ffffff" height="369" id="svg_1" stroke="#000000" width="635.87" x="22.74" y="26.61"/>
|
||||
<rect fill="#aaffff" height="0" id="svg_2" stroke="#000000" width="0" x="191.24" y="101.36">Application A</rect>
|
||||
<rect fill="#aaffff" height="88.96" id="svg_3" stroke="#000000" width="171" x="400.9" y="67.61">ublk Server</rect>
|
||||
<line fill="none" id="svg_4" stroke="#000000" stroke-dasharray="5,5" stroke-width="2" x1="23.11" x2="660.11" y1="199.03" y2="198.03">ublk Server</line>
|
||||
<text fill="#000000" font-family="Serif" font-size="21" font-weight="bold" id="svg_5" stroke="#000000" stroke-width="0" text-anchor="middle" transform="matrix(1 0 0 1 0 0)" x="488.28" xml:space="preserve" y="122.24">ublk Server</text>
|
||||
<rect fill="#aaffff" height="62" id="svg_6" stroke="#000000" transform="matrix(1 0 0 1 0 0)" width="161" x="384.38" y="311.2"/>
|
||||
<text fill="#000000" font-family="Serif" font-size="21" font-weight="bold" id="svg_7" stroke="#000000" stroke-width="0" text-anchor="middle" transform="matrix(1 0 0 1 0 0)" x="468.93" xml:space="preserve" y="349.7">ublk Driver</text>
|
||||
<rect fill="#ffff00" height="32" id="svg_8" stroke="#000000" width="98" x="144.36" y="212.94"/>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_9" stroke="#000000" stroke-width="0" text-anchor="middle" x="194.36" xml:space="preserve" y="235.94">/dev/ublkb3</text>
|
||||
<rect fill="#ffffff" height="0" id="svg_10" stroke="#000000" width="0" x="175.36" y="246.94"/>
|
||||
<rect fill="#ffff00" height="33" id="svg_11" stroke="#000000" width="97" x="200.03" y="239.6"/>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_12" stroke="#000000" stroke-width="0" text-anchor="middle" x="249.36" xml:space="preserve" y="263.27">/dev/ublkb2</text>
|
||||
<rect fill="#ffffff" height="0" id="svg_13" stroke="#000000" width="0" x="174.36" y="264.94"/>
|
||||
<rect fill="#ffff00" height="33" id="svg_14" stroke="#000000" width="97" x="33.99" y="244.06"/>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_15" stroke="#000000" stroke-width="0" text-anchor="middle" x="82.99" xml:space="preserve" y="267.06">/dev/ublkb1</text>
|
||||
<rect fill="#00ff00" height="32" id="svg_16" stroke="#000000" width="93" x="35.99" y="206.31">le/dev/ublkb1</rect>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_17" stroke="#000000" stroke-width="0" text-anchor="middle" x="80.99" xml:space="preserve" y="226.31">Filesystem</text>
|
||||
<path d="m383.94,359.38l-298.65,-1.66c0,0 -1.68,-79.96 -1.68,-79.96" fill="none" id="svg_22" stroke="#000000" stroke-linejoin="bevel" stroke-width="4"/>
|
||||
<path d="m384.83,334.28l-148.14,-0.2c0,0 3.33,-62.12 3.33,-62.12" fill="none" id="svg_26" stroke="#000000" stroke-linejoin="bevel" stroke-width="4" transform="matrix(1 0 0 1 0 0)"/>
|
||||
<path d="m384.69,347.33l-201.99,-0.22l0,-102.04" fill="none" id="svg_27" stroke="#000000" stroke-linejoin="bevel" stroke-width="4" transform="matrix(1 0 0 1 0 0)"/>
|
||||
<path d="m454.33,155.75c0,0 0.48,154.94 0.32,154.69c-0.16,-0.25 -0.32,-154.69 -0.32,-154.69z" fill="none" id="svg_28" stroke="#000000" stroke-linejoin="bevel" stroke-width="3"/>
|
||||
<path d="m468.6,156.42l0.18,155.99l-0.18,-155.99z" fill="none" id="svg_29" stroke="#000000" stroke-linejoin="bevel" stroke-width="3"/>
|
||||
<path d="m482.69,157.08l-0.32,154.03l0.32,-154.03z" fill="none" id="svg_30" stroke="#000000" stroke-linecap="square" stroke-linejoin="bevel" stroke-width="3">ublk Server</path>
|
||||
<rect fill="#aaffff" height="35.63" id="svg_40" stroke="#000000" width="109.37" x="65.74" y="91.86">Application A</rect>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_41" stroke="#000000" stroke-width="0" style="cursor: move;" text-anchor="middle" x="119.36" xml:space="preserve" y="112.19">Application D</text>
|
||||
<rect fill="#aaffff" height="30.63" id="svg_42" stroke="#000000" width="109.37" x="89.49" y="115.61">Application A</rect>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_43" stroke="#000000" stroke-width="0" text-anchor="middle" x="143.11" xml:space="preserve" y="136.56">Application C</text>
|
||||
<rect fill="#aaffff" height="31.25" id="svg_44" stroke="#000000" width="109.37" x="114.49" y="139.99">Application A</rect>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_45" stroke="#000000" stroke-width="0" style="cursor: move;" text-anchor="middle" x="169.36" xml:space="preserve" y="160.31">Application B</text>
|
||||
<rect fill="#aaffff" height="30.63" id="svg_46" stroke="#000000" width="109.37" x="145.74" y="164.99">Application A</rect>
|
||||
<text fill="#000000" font-family="Serif" font-size="18" id="svg_47" stroke="#000000" stroke-width="0" text-anchor="middle" x="201.24" xml:space="preserve" y="186.56">Application A</text>
|
||||
<text fill="#000000" font-family="Serif" font-size="21" font-weight="bold" id="svg_50" stroke="#000000" stroke-width="0" text-anchor="middle" transform="matrix(1 0 0 1 0 0)" x="161.4" xml:space="preserve" y="82.24">ublk Workload</text>
|
||||
<text fill="#000000" font-family="Serif" font-size="19" font-style="italic" font-weight="normal" id="svg_51" stroke="#000000" stroke-width="0" text-anchor="middle" x="602.65" xml:space="preserve" y="222.24">Kernel Space</text>
|
||||
<text fill="#000000" font-family="Serif" font-size="19" font-style="italic" font-weight="normal" id="svg_52" stroke="#000000" stroke-width="0" text-anchor="middle" transform="matrix(1 0 0 1 0 0)" x="602.03" xml:space="preserve" y="188.49">Userspace</text>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 5.6 KiB |
217
doc/ublk.md
Normal file
217
doc/ublk.md
Normal file
@ -0,0 +1,217 @@
|
||||
# ublk Target {#ublk}
|
||||
|
||||
## Table of Contents {#ublk_toc}
|
||||
|
||||
- @ref ublk_intro
|
||||
- @ref ublk_internal
|
||||
- @ref ublk_impl
|
||||
- @ref ublk_op
|
||||
|
||||
## Introduction {#ublk_intro}
|
||||
|
||||
[ublk](https://docs.kernel.org/block/ublk.html) (or ubd) is a generic framework for
|
||||
implementing generic userspace block device based on `io_uring`. It is designed to
|
||||
create a highly efficient data path for userspace storage software to provide
|
||||
high-performance block device service in local host.
|
||||
|
||||
The whole ublk service involves three parts: ublk driver, ublk server and ublk workload.
|
||||
|
||||

|
||||
|
||||
* __ublk driver__ is a kernel driver added to kernel 6.0. It delivers I/O requests
|
||||
from a ublk block device(`/dev/ublkbN`) into a ublk server.
|
||||
|
||||
* __ublk workload__ can be any local host process which submits I/O requests to a ublk
|
||||
block device or a kernel filesystem on top of the ublk block device.
|
||||
|
||||
* __ublk server__ is the userspace storage software that fetches the I/O requests delivered
|
||||
by the ublk driver. The ublk server will process the I/O requests with its specific block
|
||||
service logic and connected backends. Once the ublk server gets the response from the
|
||||
connected backends, it communicates with the ublk driver and completes the I/O requests.
|
||||
|
||||
SPDK ublk target acts as a ublk server. It can handle ublk I/O requests within the whole
|
||||
SPDK userspace storage software stack.
|
||||
|
||||
A typical usage scenario is for container attached storage:
|
||||
|
||||
* Real storage resources are assigned to SPDK, like physical NVMe devices and
|
||||
distributed block storage.
|
||||
* SPDK creates refined block devices via ublk kernel module on top of its organized
|
||||
storage resources, based on user configuration.
|
||||
* Container orchestrator and runtime can then mount and stage the ublk block devices
|
||||
for container instances to use.
|
||||
|
||||
## ublk Internal {#ublk_internal}
|
||||
|
||||
Previously, the design of putting I/O processing logic into userspace software always has a
|
||||
noticeable interaction overhead between the kernel module and userspace part.
|
||||
|
||||
ublk utilizes `io_uring` which has been proven to be very efficient in decreasing the
|
||||
interaction overhead. The I/O request is delivered to the userspace ublk server via the
|
||||
newly added `io_uring` command. A shared buffer via `mmap` is used for sharing I/O descriptor
|
||||
to userspace from the kernel driver. The I/O data is copied only once between the specified
|
||||
userspace buffer address and request/bio's pages by the ublk driver.
|
||||
|
||||
### Control Plane
|
||||
|
||||
A control device is create by ublk kernel module at `/dev/ublk-control`. Userspace server
|
||||
sends control commands to kernel module via the control device using `io_uring`.
|
||||
|
||||
Control commands includes add, configure, and start new ublk block device.
|
||||
Retrieving device information, stop and delete existing ublk block device are also there.
|
||||
|
||||
The add device command creates a bulk char device `/dev/ublkcN`.
|
||||
It will be used by the ublk userspace server to `mmap` I/O descriptor buffer.
|
||||
The start device command exposes a ublk block device `/dev/ublkbN`.
|
||||
The block device can be formatted and mounted by a kernel filesystem,
|
||||
or read/written directly by other processes.
|
||||
|
||||
### Data Plane
|
||||
|
||||
The datapath between ublk server and kernel driver includes `io_uring` and shared
|
||||
memory buffer. The shared memory buffer is an array of I/O descriptors.
|
||||
Each SQE (Submission Queue Entry) in `io_uring` is assigned one I/O descriptor and
|
||||
one user buffer address. When ublk kernel driver receives I/O requests from upper
|
||||
layer, the information of I/O requests will be filled into I/O descriptors by ublk
|
||||
kernel driver. The I/O data is copied between the specified user buffer address and
|
||||
request/bio's pages at the proper time.
|
||||
|
||||
At start, the ublk server needs to fill the `io_uring` SQ (Submission Queue). Each
|
||||
SQE is marked with an operation flag `UBLK_IO_FETCH_REQ` which means the SQE is
|
||||
ready to get I/O request.
|
||||
|
||||
When a CQE (Completion Queue Entry) is returned from the `io_uring` indicating I/O
|
||||
request, the ublk server gets the position of the I/O descriptor from CQE.
|
||||
The ublk server handles the I/O request based on information in the I/O descriptor.
|
||||
|
||||
After the ublk server completes the I/O request, it updates the I/O's completion status
|
||||
and ublk operation flag. This time, the operation flag is `UBLK_IO_COMMIT_AND_FETCH_REQ`
|
||||
which informs kernel module that one I/O request is completed, and also the SQE slot
|
||||
is free to fetch new I/O request.
|
||||
|
||||
`UBLK_IO_COMMIT_AND_FETCH_REQ` is designed for efficiency in ublk. In runtime, the ublk
|
||||
server needs to commit I/O results back, and then provide new free SQE slots for fetching
|
||||
new I/O requests. Without `UBLK_IO_COMMIT_AND_FETCH_REQ` flag, `io_uring_submit()` should
|
||||
be called twice, once for committing I/O results back, once for providing free SQE slots.
|
||||
With `UBLK_IO_COMMIT_AND_FETCH_REQ` flag, calling `io_uring_submit()` once is enough because
|
||||
the ublk driver realizes that the submitted SQEs are reused both for committing back I/O
|
||||
results and fetching new requests.
|
||||
|
||||
## SPDK Implementation {#ublk_impl}
|
||||
|
||||
SPDK ublk target is implemented as a high performance ublk server.
|
||||
|
||||
It creates one ublk spdk_thread on each spdk_reactor by default or on user specified
|
||||
reactors. When adding a new ublk block device, SPDK ublk target will assign queues
|
||||
of ublk block device to ublk spdk_threads in round-robin.
|
||||
That means one ublk device queue will only be processed by one spdk_thread.
|
||||
One ublk device with multiple queues can get multiple spdk reactors involved
|
||||
to process its I/O requests;
|
||||
One spdk_thread created by ublk target may process multiple queues, each from
|
||||
different ublk devices.
|
||||
In this way, spdk reactors can be fully utilized to achieve best performance,
|
||||
when there are only a few ublk devices.
|
||||
|
||||
ublk is `io_uring` based. All ublk I/O queues are mapped to `io_uring`.
|
||||
ublk spdk_thread gets I/O requests from available CQEs by polling all its assigned
|
||||
`io_uring`s.
|
||||
When there are completed I/O requests, ublk spdk_thread will submit them as SQE back
|
||||
to `io_uring` in batch.
|
||||
|
||||
Currently, ublk driver has a system thread context limitation that one ublk device queue
|
||||
can be only processed in the context of system thread which initialized the it. SPDK
|
||||
can't schedule ublk spdk_thread between different SPDK reactors. In other words, SPDK
|
||||
dynamic scheduler can't rebalance ublk workload by rescheduling ublk spdk_thread.
|
||||
|
||||
## Operation {#ublk_op}
|
||||
|
||||
### Enabling SPDK ublk target
|
||||
|
||||
Build SPDK with SPDK ublk target enabled.
|
||||
|
||||
~~~{.sh}
|
||||
./configure --with-ublk
|
||||
make -j
|
||||
~~~
|
||||
|
||||
SPDK ublk target related libaries will then be linked into SPDK appliation `spdk_tgt`.
|
||||
Setup some hugepages for the SPDK, and then run the SPDK application `spdk_tgt`.
|
||||
|
||||
~~~{.sh}
|
||||
scripts/setup.sh
|
||||
build/bin/spdk_tgt &
|
||||
~~~
|
||||
|
||||
Once the `spdk_tgt` is initialized, user can enable SPDK ublk feature
|
||||
by creating ublk target. However, before creating ublk target, ublk kernel module
|
||||
`ublk_drv` should be loaded using `modprobe`.
|
||||
|
||||
~~~{.sh}
|
||||
modprobe ublk_drv
|
||||
scripts/rpc.py ublk_create_target
|
||||
~~~
|
||||
|
||||
### Creating ublk block device
|
||||
|
||||
SPDK bdevs are block devices which will be exposed to the local host kernel
|
||||
as ublk block devices. SPDK supports several different types of storage backends,
|
||||
including NVMe, Linux AIO, malloc ramdisk and Ceph RBD. Refer to @ref bdev for
|
||||
additional information on configuring SPDK storage backends.
|
||||
|
||||
This guide will use a malloc bdev (ramdisk) named Malloc0. The following RPC
|
||||
will create a 256MB malloc bdev with 512-byte block size.
|
||||
|
||||
~~~{.sh}
|
||||
scripts/rpc.py bdev_malloc_create 256 512 -b Malloc0
|
||||
~~~
|
||||
|
||||
The following RPC will create a ublk block device exposing Malloc0 bdev.
|
||||
The created ublk block device has ID 1. It internally has 2 queues with
|
||||
queue depth 128.
|
||||
|
||||
~~~{.sh}
|
||||
scripts/rpc.py ublk_start_disk Malloc0 1 -q 2 -d 128
|
||||
~~~
|
||||
|
||||
This RPC will reply back the ID of ublk block device.
|
||||
~~~
|
||||
1
|
||||
~~~
|
||||
|
||||
The position of ublk block device is determined by its ID. It is created at `/dev/ublkb${ID}`.
|
||||
So the device we just created will be accessible to other processes via `/dev/ublkb1`.
|
||||
Now applications like FIO or DD can work on `/dev/ublkb1` directly.
|
||||
|
||||
~~~{.sh}
|
||||
dd of=/dev/ublkb1 if=/dev/zero bs=512 count=64
|
||||
~~~
|
||||
|
||||
A ublk block device is a generic kernel block device that can be formatted and
|
||||
mounted by kernel file system.
|
||||
|
||||
~~~{.sh}
|
||||
mkfs /dev/ublkb1
|
||||
mount /dev/ublkb1 /mnt/
|
||||
mkdir /mnt/testdir
|
||||
echo "Hello,SPDK ublk Target" > /mnt/testdir/testfile
|
||||
umount /mnt
|
||||
~~~
|
||||
|
||||
### Deleting ublk block device and exit
|
||||
|
||||
After usage, ublk block device can be stopped and deleted by RPC `ublk_stop_disk` with its ID.
|
||||
Specify ID 1, then device `/dev/ublkb1` will be removed.
|
||||
|
||||
~~~{.sh}
|
||||
scripts/rpc.py ublk_stop_disk 1
|
||||
~~~
|
||||
|
||||
If ublk is not used anymore, SPDK ublk target can be destroyed to free related SPDK
|
||||
resources.
|
||||
|
||||
~~~{.sh}
|
||||
scripts/rpc.py ublk_destroy_target
|
||||
~~~
|
||||
|
||||
Of course, SPDK ublk target and all ublk block devices would be destroyed automatically
|
||||
when SPDK application is terminated.
|
@ -14,3 +14,4 @@
|
||||
- @subpage usdt
|
||||
- @subpage nvme_multipath
|
||||
- @subpage sma
|
||||
- @subpage ublk
|
||||
|
Loading…
Reference in New Issue
Block a user