Environment check for SPDK support (#5880)
* Environment check for SPDK support Longhorn 5738 Longhorn 5380 Signed-off-by: Derek Su <derek.su@suse.com> Co-authored-by: David Ko <dko@suse.com>
This commit is contained in:
parent
d1c3f58399
commit
73a8bda8bd
@ -73,8 +73,7 @@ error() {
|
|||||||
######################################################
|
######################################################
|
||||||
# Check logics
|
# Check logics
|
||||||
######################################################
|
######################################################
|
||||||
set_packages_and_check_cmd()
|
set_packages_and_check_cmd() {
|
||||||
{
|
|
||||||
case $OS in
|
case $OS in
|
||||||
*"debian"* | *"ubuntu"* )
|
*"debian"* | *"ubuntu"* )
|
||||||
CHECK_CMD='dpkg -l | grep -w'
|
CHECK_CMD='dpkg -l | grep -w'
|
||||||
@ -105,16 +104,14 @@ set_packages_and_check_cmd()
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
detect_node_kernel_release()
|
detect_node_kernel_release() {
|
||||||
{
|
|
||||||
local pod="$1"
|
local pod="$1"
|
||||||
|
|
||||||
KERNEL_RELEASE=$(kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c 'uname -r')
|
KERNEL_RELEASE=$(kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c 'uname -r')
|
||||||
echo "$KERNEL_RELEASE"
|
echo "$KERNEL_RELEASE"
|
||||||
}
|
}
|
||||||
|
|
||||||
detect_node_os()
|
detect_node_os() {
|
||||||
{
|
|
||||||
local pod="$1"
|
local pod="$1"
|
||||||
|
|
||||||
OS=$(kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c 'grep -E "^ID_LIKE=" /etc/os-release | cut -d= -f2')
|
OS=$(kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c 'grep -E "^ID_LIKE=" /etc/os-release | cut -d= -f2')
|
||||||
@ -130,13 +127,13 @@ check_local_dependencies() {
|
|||||||
local all_found=true
|
local all_found=true
|
||||||
for ((i=0; i<${#targets[@]}; i++)); do
|
for ((i=0; i<${#targets[@]}; i++)); do
|
||||||
local target=${targets[$i]}
|
local target=${targets[$i]}
|
||||||
if [ "$(which $target)" == "" ]; then
|
if [ "$(which $target)" = "" ]; then
|
||||||
all_found=false
|
all_found=false
|
||||||
error "Not found: $target"
|
error "Not found: $target"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
if [ "$all_found" == "false" ]; then
|
if [ "$all_found" = "false" ]; then
|
||||||
msg="Please install missing dependencies: ${targets[@]}."
|
msg="Please install missing dependencies: ${targets[@]}."
|
||||||
info "$msg"
|
info "$msg"
|
||||||
exit 2
|
exit 2
|
||||||
@ -195,7 +192,7 @@ wait_ds_ready() {
|
|||||||
local numberReady=$(echo $ds | jq .status.numberReady)
|
local numberReady=$(echo $ds | jq .status.numberReady)
|
||||||
local desiredNumberScheduled=$(echo $ds | jq .status.desiredNumberScheduled)
|
local desiredNumberScheduled=$(echo $ds | jq .status.desiredNumberScheduled)
|
||||||
|
|
||||||
if [ "$desiredNumberScheduled" == "$numberReady" ] && [ "$desiredNumberScheduled" != "0" ]; then
|
if [ "$desiredNumberScheduled" = "$numberReady" ] && [ "$desiredNumberScheduled" != "0" ]; then
|
||||||
info "All longhorn-environment-check pods are ready ($numberReady/$desiredNumberScheduled)."
|
info "All longhorn-environment-check pods are ready ($numberReady/$desiredNumberScheduled)."
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
@ -224,47 +221,13 @@ check_mount_propagation() {
|
|||||||
done
|
done
|
||||||
|
|
||||||
if [ "$allSupported" != "true" ]; then
|
if [ "$allSupported" != "true" ]; then
|
||||||
error "MountPropagation is disabled on at least one node. As a result, CSI driver and Base image cannot be supported."
|
error "MountPropagation is disabled on at least one node. As a result, CSI driver and Base image cannot be supported"
|
||||||
exit 1
|
exit 1
|
||||||
else
|
else
|
||||||
info "MountPropagation is enabled."
|
info "MountPropagation is enabled"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
check_package_installed() {
|
|
||||||
local pods=$(kubectl get pods -o name -l app=longhorn-environment-check)
|
|
||||||
|
|
||||||
local all_found=true
|
|
||||||
|
|
||||||
for pod in ${pods}; do
|
|
||||||
OS=$(detect_node_os $pod)
|
|
||||||
if [ x"$OS" == x"" ]; then
|
|
||||||
error "Unable to detect OS on node $node."
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
set_packages_and_check_cmd "$OS"
|
|
||||||
|
|
||||||
for ((i=0; i<${#PACKAGES[@]}; i++)); do
|
|
||||||
local package=${PACKAGES[$i]}
|
|
||||||
|
|
||||||
kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- timeout 30 bash -c "$CHECK_CMD $package" > /dev/null 2>&1
|
|
||||||
if [ $? != 0 ]; then
|
|
||||||
all_found=false
|
|
||||||
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
|
||||||
error "$package is not found in $node."
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "$all_found" == "false" ]; then
|
|
||||||
error "Please install missing packages."
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
info "Required packages are installed."
|
|
||||||
}
|
|
||||||
|
|
||||||
check_hostname_uniqueness() {
|
check_hostname_uniqueness() {
|
||||||
hostnames=$(kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="Hostname")].address}')
|
hostnames=$(kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="Hostname")].address}')
|
||||||
|
|
||||||
@ -283,82 +246,199 @@ check_hostname_uniqueness() {
|
|||||||
info "Hostname uniqueness check is passed."
|
info "Hostname uniqueness check is passed."
|
||||||
}
|
}
|
||||||
|
|
||||||
check_multipathd() {
|
check_nodes() {
|
||||||
local pods=$(kubectl get pods -o name -l app=longhorn-environment-check)
|
local name=$1
|
||||||
local all_not_found=true
|
local callback=$2
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
|
||||||
|
info "Checking $name..."
|
||||||
|
|
||||||
|
local all_passed=true
|
||||||
|
|
||||||
|
local pods=$(kubectl get pods -o name -l app=longhorn-environment-check)
|
||||||
for pod in ${pods}; do
|
for pod in ${pods}; do
|
||||||
kubectl exec -t $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c "systemctl status --no-pager multipathd.service" > /dev/null 2>&1
|
eval "${callback} ${pod} $@"
|
||||||
if [ $? = 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
all_not_found=false
|
all_passed=false
|
||||||
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
|
||||||
warn "multipathd is running on $node."
|
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
if [ "$all_not_found" == "false" ]; then
|
if [ "$all_passed" = "false" ]; then
|
||||||
warn "multipathd would probably result in the Longhorn volume mount failure. Please refer to https://longhorn.io/kb/troubleshooting-volume-with-multipath for more information."
|
return 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
check_iscsid() {
|
check_iscsid() {
|
||||||
local pods=$(kubectl get pods -o name -l app=longhorn-environment-check)
|
local pod=$1
|
||||||
local all_found=true
|
|
||||||
|
|
||||||
for pod in ${pods}; do
|
kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "systemctl status --no-pager iscsid.service" > /dev/null 2>&1
|
||||||
kubectl exec -t $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c "systemctl status --no-pager iscsid.service" > /dev/null 2>&1
|
if [ $? -ne 0 ]; then
|
||||||
|
kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "systemctl status --no-pager iscsid.socket" > /dev/null 2>&1
|
||||||
if [ $? != 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
all_found=false
|
|
||||||
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
||||||
error "iscsid is not running on $node."
|
error "Neither iscsid.service nor iscsid.socket is not running on ${node}"
|
||||||
|
return 1
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
|
|
||||||
if [ "$all_found" == "false" ]; then
|
|
||||||
exit 2
|
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
check_nfs_client_kernel_support() {
|
check_multipathd() {
|
||||||
local pods=$(kubectl get pods -o name -l app=longhorn-environment-check)
|
local pod=$1
|
||||||
local all_found=true
|
|
||||||
local nfs_client_kernel_configs=("CONFIG_NFS_V4_1" "CONFIG_NFS_V4_2")
|
|
||||||
|
|
||||||
for config in "${nfs_client_kernel_configs[@]}"; do
|
kubectl exec -t $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c "systemctl status --no-pager multipathd.service" > /dev/null 2>&1
|
||||||
declare -A nodes=()
|
if [ $? = 0 ]; then
|
||||||
|
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
||||||
|
warn "multipathd is running on ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
for pod in ${pods}; do
|
check_packages() {
|
||||||
local kernel_release=$(detect_node_kernel_release $pod)
|
local pod=$1
|
||||||
if [ x"$kernel_release" == x"" ]; then
|
|
||||||
error "Unable to detect kernel release on node $node."
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
OS=$(detect_node_os ${pod})
|
||||||
res=$(kubectl exec -t $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c "grep -E \"^# ${config} is not set\" /boot/config-${kernel_release}" > /dev/null 2>&1)
|
if [ x"$OS" = x"" ]; then
|
||||||
if [[ $? == 0 ]]; then
|
error "Failed to detect OS on node ${node}"
|
||||||
all_found=false
|
return 1
|
||||||
nodes["${node}"]="${node}"
|
fi
|
||||||
else
|
|
||||||
res=$(kubectl exec -t $pod -- nsenter --mount=/proc/1/ns/mnt -- bash -c "grep -E \"^${config}=\" /boot/config-${kernel_release}" > /dev/null 2>&1)
|
|
||||||
if [[ $? != 0 ]]; then
|
|
||||||
all_found=false
|
|
||||||
warn "Unable to check kernel config ${config} on node ${node}"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ ${#nodes[@]} != 0 ]; then
|
set_packages_and_check_cmd
|
||||||
warn ""${config}" kernel config is not enabled on nodes ${nodes[*]}."
|
|
||||||
|
for ((i=0; i<${#PACKAGES[@]}; i++)); do
|
||||||
|
check_package ${PACKAGES[$i]}
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
check_package() {
|
||||||
|
local package=$1
|
||||||
|
|
||||||
|
kubectl exec -i $pod -- nsenter --mount=/proc/1/ns/mnt -- timeout 30 bash -c "$CHECK_CMD $package" > /dev/null 2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
||||||
|
error "$package is not found in $node."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_nfs_client() {
|
||||||
|
local pod=$1
|
||||||
|
local node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
||||||
|
|
||||||
|
local options=("CONFIG_NFS_V4_2" "CONFIG_NFS_V4_1" "CONFIG_NFS_V4")
|
||||||
|
|
||||||
|
local kernel=$(detect_node_kernel_release ${pod})
|
||||||
|
if [ "x${kernel}" = "x" ]; then
|
||||||
|
warn "Failed to check NFS client installation, because unable to detect kernel release on node ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for option in "${options[@]}"; do
|
||||||
|
kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "[ -f /boot/config-${kernel} ]" > /dev/null 2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
warn "Failed to check $option on node ${node}, because /boot/config-${kernel} does not exist on node ${node}"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
check_kernel_module ${pod} ${option} nfs
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
return 0
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
if [[ ${all_found} == false ]]; then
|
error "NFS clients ${options[*]} should be enabled at least one."
|
||||||
warn "NFS client kernel support, ${nfs_client_kernel_configs[*]}, is not enabled on Longhorn nodes. Please refer to https://longhorn.io/docs/1.4.0/deploy/install/#installing-nfsv4-client for more information."
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
check_kernel_module() {
|
||||||
|
local pod=$1
|
||||||
|
local option=$2
|
||||||
|
local module=$3
|
||||||
|
|
||||||
|
local kernel=$(detect_node_kernel_release ${pod})
|
||||||
|
if [ "x${kernel}" = "x" ]; then
|
||||||
|
warn "Failed to check kernel config option ${option}, because unable to detect kernel release on node ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "[ -e /boot/config-${kernel} ]" > /dev/null 2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
warn "Failed to check kernel config option ${option}, because /boot/config-${kernel} does not exist on node ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
value=$(kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "grep "^$option=" /boot/config-${kernel} | cut -d= -f2")
|
||||||
|
if [ -z "${value}" ]; then
|
||||||
|
error "Failed to find kernel config $option on node ${node}"
|
||||||
|
return 1
|
||||||
|
elif [ "${value}" = "m" ]; then
|
||||||
|
kubectl exec -t ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c "lsmod | grep ${module}" > /dev/null 2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
node=$(kubectl get ${pod} --no-headers -o=custom-columns=:.spec.nodeName)
|
||||||
|
error "kernel module ${module} is not enabled on ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
elif [ "${value}" = "y" ]; then
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
warn "Unknown value for $option: $value"
|
||||||
|
return 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_hugepage() {
|
||||||
|
local pod=$1
|
||||||
|
local expected_nr_hugepages=$2
|
||||||
|
|
||||||
|
nr_hugepages=$(kubectl exec -i ${pod} -- nsenter --mount=/proc/1/ns/mnt -- bash -c 'cat /proc/sys/vm/nr_hugepages')
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
error "Failed to check hugepage size on node ${node}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $nr_hugepages -lt $expected_nr_hugepages ]; then
|
||||||
|
error "Hugepage size is not enough on node ${node}. Expected: ${expected_nr_hugepages}, Actual: ${nr_hugepages}"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function show_help() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: $0 [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-s, --enable-spdk Enable checking SPDK prerequisites
|
||||||
|
-p, --expected-nr-hugepages Expected number of hugepages for SPDK. Default: 1024
|
||||||
|
-h, --help Show this help message and exit
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
enable_spdk=false
|
||||||
|
expected_nr_hugepages=1024
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
opt="$1"
|
||||||
|
case $opt in
|
||||||
|
-s|--enable-spdk)
|
||||||
|
enable_spdk=true
|
||||||
|
;;
|
||||||
|
-p|--expected-nr-hugepages)
|
||||||
|
expected_nr_hugepages="$2"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
instance_manager_options+=("$1")
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
######################################################
|
######################################################
|
||||||
# Main logics
|
# Main logics
|
||||||
######################################################
|
######################################################
|
||||||
@ -375,10 +455,16 @@ trap cleanup EXIT
|
|||||||
create_ds
|
create_ds
|
||||||
wait_ds_ready
|
wait_ds_ready
|
||||||
|
|
||||||
check_nfs_client_kernel_support
|
|
||||||
check_package_installed
|
|
||||||
check_iscsid
|
|
||||||
check_multipathd
|
|
||||||
check_mount_propagation
|
check_mount_propagation
|
||||||
|
check_nodes "iscsid" check_iscsid
|
||||||
|
check_nodes "multipathd" check_multipathd
|
||||||
|
check_nodes "packages" check_packages
|
||||||
|
check_nodes "nfs client" check_nfs_client
|
||||||
|
|
||||||
|
if [ "$enable_spdk" = "true" ]; then
|
||||||
|
check_nodes "kernel module nvme_tcp" check_kernel_module CONFIG_NVME_TCP nvme_tcp
|
||||||
|
check_nodes "kernel module uio" check_kernel_module CONFIG_UIO uio
|
||||||
|
check_nodes "hugepage" check_hugepage ${expected_nr_hugepages}
|
||||||
|
fi
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
Loading…
Reference in New Issue
Block a user