rxe_cfg: Drop librxe and port minimal rxe_cfg functionality to Bash

Since rdma-core handles the entire libibverbs suite, and because it
dropped rxe_cfg in favor of iproute2's rdma tool, building librxe is
not needed anymore. That said, the functionality of the rxe_cfg that
our test suites are using is basic and useful enough to be preserved
in a form of a simple Bash script. This can be used to avoid full
overhaul of the code which would need to be adjusted for iproute2's
tooling. In case more complex rdma configuration link-wise is needed,
iproute2 dependency can be added then.

Additionally, some of the nvmf functions have been simplified to make
use of the rxe_cfg port.

The formatting of the status cmd is left compatible with the rxe_cfg.

Change-Id: I594a24b73472a16d51401bcd74fd30c415b24ddb
Signed-off-by: Michal Berger <michalx.berger@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1457
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Maciej Wawryk <maciejx.wawryk@intel.com>
Reviewed-by: Karol Latecki <karol.latecki@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Michal Berger 2020-03-25 13:00:39 +01:00 committed by Tomasz Zawadzki
parent d3030f6b95
commit cba9514dec
4 changed files with 273 additions and 64 deletions

265
scripts/rxe_cfg_small.sh Executable file
View File

@ -0,0 +1,265 @@
#!/usr/bin/env bash
[[ $(uname -s) == Linux ]] || exit 0
shopt -s extglob nullglob
declare -r rdma_rxe=/sys/module/rdma_rxe
declare -r rdma_rxe_add=$rdma_rxe/parameters/add
declare -r rdma_rxe_rm=$rdma_rxe/parameters/remove
declare -r infiniband=/sys/class/infiniband
declare -r net=/sys/class/net
uevent() (
[[ -e $1/uevent ]] || return 0
source "$1/uevent"
if [[ -v $2 ]]; then
echo "${!2}"
elif [[ -n $3 ]]; then
echo "$3"
fi
)
modprobeq() {
modprobe -q "$@"
}
get_ipv4() {
local ip
# Get only the first ip
read -r _ _ _ ip _ < <(ip -o -4 addr show dev "$1")
if [[ -n $ip ]]; then
echo "${ip%/*}"
else
echo " "
fi
}
get_rxe_mtu() {
local rxe=$1
local mtu
[[ -c /dev/infiniband/uverbs${rxe/rxe/} ]] || return 0
[[ $(ibv_devinfo -d "$rxe") =~ active_mtu:(.*\ \(.*\)) ]]
echo "${BASH_REMATCH[1]:-(?)}"
}
start() {
local modules module
modules=(
"ib_core"
"ib_uverbs"
"rdma_ucm"
"rdma_rxe"
)
for module in "${modules[@]}"; do
[[ -e /sys/module/$module ]] && continue
if [[ ! -e $(modinfo -F filename "$module") ]]; then
return 0
fi
done 2> /dev/null
modprobeq -a "${modules[@]}" || return 1
add_rxe all
}
stop() {
local rxe
for rxe in "$infiniband/rxe"+([0-9]); do
remove_rxe "${rxe##*/}"
done
if ! modprobeq -r rdma_rxe \
|| [[ -e $rdma_rxe ]]; then
printf 'unable to unload drivers, reboot required\n'
fi
}
status_header() {
local header=("Name" "Link" "Driver" "Speed" "NMTU" "IPv4_addr" "RDEV" "RMTU")
size_print_fields "${header[@]}"
}
status() {
if [[ ! -e $rdma_rxe ]]; then
printf 'rdma_rxe module not loaded\n' >&2
fi
local dev net_devs
local link_map
link_map[0]=no
link_map[1]=yes
status_header
local name link driver speed mtu ip rxe rxe_dev active_mtu
for dev in "$net/"!(bonding_masters); do
(($(< "$dev/type") == 1)) || continue
name="" link="" driver=""
speed="" mtu="" ip=""
rxe_dev="" active_mtu=""
name=${dev##*/}
for rxe in "$infiniband/rxe"+([0-9]); do
if [[ $(< "$rxe/parent") == "$name" ]]; then
rxe_dev=${rxe##*/}
active_mtu=$(get_rxe_mtu "$rxe_dev")
break
fi
done
link=${link_map[$(< "$dev/carrier")]}
if [[ -e $dev/device/driver ]]; then
driver=$(readlink -f "$dev/device/driver")
driver=${driver##*/}
elif [[ -e /sys/devices/virtual/net/${dev##*/} ]]; then
# Try to be smart and get the type of the device instead
driver=$(uevent "$dev" "DEVTYPE" "virtual")
fi
if [[ $link == yes ]]; then
speed=$(< "$dev/speed")
if ((speed > 10000)); then
speed=10GigE
elif ((speed > 0)); then
speed=${speed}Mb/s
else
speed=""
fi
fi
mtu=$(< "$dev/mtu")
ip=$(get_ipv4 "$name")
size_print_fields \
"$name" \
"$link" \
"$driver" \
"$speed" \
"$mtu" \
"$ip" \
"$rxe_dev" \
"$active_mtu"
done 2> /dev/null
print_status
}
size_print_fields() {
local fields=("$@") field
local -g lengths lines lineno
for field in "${!fields[@]}"; do
if [[ -z ${fields[field]} ]]; then
fields[field]="###"
fi
if [[ -z ${lengths[field]} ]]; then
lengths[field]=${#fields[field]}
else
lengths[field]=$((lengths[field] > ${#fields[field]} ? lengths[field] : ${#fields[field]}))
fi
done
eval "local -g _line_$lineno=(\"\${fields[@]}\")"
lines+=("_line_${lineno}[@]")
((++lineno))
}
print_status() {
local field field_ref fieldidx
local pad
for field_ref in "${lines[@]}"; do
printf ' '
fieldidx=0
for field in "${!field_ref}"; do
if [[ -n $field ]]; then
pad=$((lengths[fieldidx] - ${#field} + 2))
else
pad=$((lengths[fieldidx] + 2))
fi
if [[ -n $field && $field != "###" ]]; then
printf '%s' "$field"
else
printf ' '
fi
printf '%*s' "$pad" ""
((++fieldidx))
done
printf '\n'
done
}
add_rxe() {
local dev net_devs
[[ -e $rdma_rxe/parameters ]] || return 1
if [[ -z $1 || $1 == all ]]; then
net_devs=("$net/"!(bonding_masters))
elif [[ -e $net/$1 ]]; then
net_devs=("$net/$1")
else
printf '%s interface does not exist\n' "$1"
return 1
fi
for dev in "${net_devs[@]}"; do
(($(< "$dev/type") != 1)) && continue
echo "${dev##*/}" > "$rdma_rxe_add"
done 2> /dev/null
}
remove_rxe() {
[[ -e $infiniband/${1##*/} ]] && echo "${1##*/}" > "$rdma_rxe_rm"
}
link_up_rxes() {
local rxe parent
for rxe in "$infiniband/rxe"+([0-9]); do
parent=$(< /"$rxe/parent")
link_up "$parent"
done
}
link_up() {
[[ -e $net/$1 ]] || return 0
echo $(($(< "$net/$1/flags") | 0x1)) > "$net/$1/flags"
}
case "${1:-status}" in
start)
start
;;
stop)
stop
;;
add)
add_rxe "${2:-all}"
;;
remove)
remove_rxe "$2"
;;
status)
IFS= read -r match < <(
IFS="|"
printf '%s\n' "${*:2}"
)
status | grep -E "${match:-.}"
;;
*)
printf 'Invalid argument (%s)\n' "$1"
;;
esac

View File

@ -1,7 +1,6 @@
# This configuration file is provided for reference purposes.
GIT_REPO_SPDK=https://review.spdk.io/gerrit/spdk/spdk
GIT_REPO_DPDK=https://github.com/spdk/dpdk.git
GIT_REPO_LIBRXE=https://github.com/SoftRoCE/librxe-dev.git
GIT_REPO_OPEN_ISCSI=https://github.com/open-iscsi/open-iscsi
GIT_REPO_ROCKSDB=https://review.gerrithub.io/spdk/rocksdb
GIT_REPO_FIO=http://git.kernel.dk/fio.git

View File

@ -24,8 +24,7 @@ VM_SETUP_PATH=$(readlink -f ${BASH_SOURCE%/*})
UPGRADE=false
INSTALL=false
CONF="librxe,rocksdb,fio,flamegraph,tsocks,qemu,vpp,libiscsi,nvmecli,qat,refspdk"
LIBRXE_INSTALL=true
CONF="rocksdb,fio,flamegraph,tsocks,qemu,vpp,libiscsi,nvmecli,qat,refspdk"
gcc_version=$(gcc -dumpversion) gcc_version=${gcc_version%%.*}
if [ $(uname -s) == "FreeBSD" ]; then
@ -85,28 +84,6 @@ EOF
fi
}
function install_rxe_cfg() {
if echo $CONF | grep -q librxe; then
# rxe_cfg is used in the NVMe-oF tests
# The librxe-dev repository provides a command line tool called rxe_cfg which makes it
# very easy to use Soft-RoCE. The build pool utilizes this command line tool in the absence
# of any real RDMA NICs to simulate one for the NVMe-oF tests.
if hash rxe_cfg 2> /dev/null; then
echo "rxe_cfg is already installed. skipping"
else
if [ -d librxe-dev ]; then
echo "librxe-dev source already present, not cloning"
else
git clone "${GIT_REPO_LIBRXE}"
fi
./librxe-dev/configure --libdir=/usr/lib64/ --prefix=
make -C librxe-dev -j${jobs}
sudo make -C librxe-dev install
fi
fi
}
function install_qat() {
if [ "$PACKAGEMNG" = "dnf" ]; then
@ -422,8 +399,6 @@ GIT_VERSION=2.25.1
export GIT_REPO_SPDK
: ${GIT_REPO_DPDK=https://github.com/spdk/dpdk.git}
export GIT_REPO_DPDK
: ${GIT_REPO_LIBRXE=https://github.com/SoftRoCE/librxe-dev.git}
export GIT_REPO_LIBRXE
: ${GIT_REPO_ROCKSDB=https://review.spdk.io/spdk/rocksdb}
export GIT_REPO_ROCKSDB
: ${GIT_REPO_FIO=http://git.kernel.dk/fio.git}
@ -612,8 +587,6 @@ if $INSTALL; then
echo "Package rdma-core is avaliable at Ubuntu 18 [universe] repositorium" >&2
sudo apt-get install -y rdmacm-utils
sudo apt-get install -y ibverbs-utils
else
LIBRXE_INSTALL=false
fi
if ! sudo apt-get install -y libpmempool1; then
echo "Package libpmempool1 is available at Ubuntu 18 [universe] repositorium" >&2
@ -742,11 +715,6 @@ git -C spdk_repo/spdk submodule update --init --recursive
sudo mkdir -p /usr/src
if [ $OSID != 'freebsd' ]; then
if [ $LIBRXE_INSTALL = true ]; then
#Ubuntu18 integrates librxe to rdma-core, libibverbs-dev no longer ships infiniband/driver.h.
#Don't compile librxe on ubuntu18 or later version, install package rdma-core instead.
install_rxe_cfg &
fi
install_libiscsi &
install_vpp &
install_nvmecli &

View File

@ -20,6 +20,10 @@ build_nvmf_app_args
have_pci_nics=0
function rxe_cfg() {
"$rootdir/scripts/rxe_cfg_small.sh" "$@"
}
function load_ib_rdma_modules() {
if [ $(uname) != Linux ]; then
return 0
@ -37,18 +41,7 @@ function load_ib_rdma_modules() {
}
function detect_soft_roce_nics() {
if hash rxe_cfg; then
rxe_cfg start
rdma_nics=$(get_rdma_if_list)
all_nics=$(ip -o link | awk '{print $2}' | cut -d":" -f1)
non_rdma_nics=$(echo -e "$rdma_nics\n$all_nics" | sort | uniq -u)
for nic in $non_rdma_nics; do
if [[ -d /sys/class/net/${nic}/bridge ]]; then
continue
fi
rxe_cfg add $nic || true
done
fi
rxe_cfg start
}
# args 1 and 2 represent the grep filters for finding our NICS.
@ -220,27 +213,11 @@ function rdma_device_init() {
}
function revert_soft_roce() {
if hash rxe_cfg; then
interfaces="$(ip -o link | awk '{print $2}' | cut -d":" -f1)"
for interface in $interfaces; do
rxe_cfg remove $interface || true
done
rxe_cfg stop || true
fi
rxe_cfg stop
}
function check_ip_is_soft_roce() {
IP=$1
if hash rxe_cfg; then
dev=$(ip -4 -o addr show | grep $IP | cut -d" " -f2)
if (rxe_cfg status "rxe" | grep -q $dev); then
return 0
else
return 1
fi
else
return 1
fi
rxe_cfg status rxe | grep -wq "$1"
}
function nvme_connect() {