Add scale test scripts
scale-test.py is currently a work in progress and needs some additional implementation for the data processing and analysis part. Signed-off-by: Joshua Moody <joshua.moody@rancher.com>
This commit is contained in:
parent
0e4085b640
commit
f176517638
12
dev/scale-test/.gitignore
vendored
Normal file
12
dev/scale-test/.gitignore
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
# ignores all goland project folders and files
|
||||
.idea
|
||||
*.iml
|
||||
*.ipr
|
||||
|
||||
# ignore output folder
|
||||
out
|
||||
tmp
|
||||
results
|
||||
|
||||
# ignore kubeconfig
|
||||
kubeconfig
|
27
dev/scale-test/README.md
Normal file
27
dev/scale-test/README.md
Normal file
@ -0,0 +1,27 @@
|
||||
## Overview
|
||||
scale-test is a collection of developer scripts that are used for scaling a cluster to a certain amount of volumes
|
||||
while monitoring the time required to complete these actions.
|
||||
`sample.sh` can be used to quickly see how long it takes for the requested amount of volumes to be up and usable.
|
||||
`scale-test.py` can be used to create the amount of requested statefulsets based on the `statefulset.yaml` template,
|
||||
as well as retrieve detailed timing information per volume.
|
||||
|
||||
|
||||
### scale-test.py
|
||||
scale-test.py watches `pod`, `pvc`, `va` events (ADDED, MODIFIED, DELETED).
|
||||
Based on that information we can calculate the time of actions for each individual pod.
|
||||
|
||||
In additional scale-test.py can also be used to create a set of statefulset deployment files.
|
||||
based on the `statefulset.yaml` with the following VARIABLES substituted based on the current sts index.
|
||||
`@NODE_NAME@` - schedule each sts on a dedicated node
|
||||
`@STS_NAME@` - also used for the volume-name
|
||||
|
||||
make sure to set the correct CONSTANT values in scale-test.py before running.
|
||||
|
||||
|
||||
### sample.sh
|
||||
sample.sh can be used to scale to a requested amount of volumes based on the existing statefulsets
|
||||
and node count for the current cluster.
|
||||
|
||||
One can pass the requested amount of volumes as well as the node count of the current cluster.
|
||||
example for 1000 volumes and 100 nodes: `./sample.sh 1000 100`
|
||||
this expects there to be a statefulset deployment for each node.
|
19
dev/scale-test/sample.sh
Executable file
19
dev/scale-test/sample.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
requested=${1:-0}
|
||||
node_count=${2:-1}
|
||||
required_scale=$((requested / node_count))
|
||||
|
||||
now=$(date)
|
||||
ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true)
|
||||
echo "$ready -- $now - start state"
|
||||
|
||||
cmd=$(kubectl scale --replicas="$required_scale" statefulset --all)
|
||||
echo "$cmd"
|
||||
while [ "$ready" -ne "$requested" ]; do
|
||||
sleep 60
|
||||
now=$(date)
|
||||
ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true)
|
||||
echo "$ready -- $now - delta:"
|
||||
done
|
||||
echo "$requested -- $now - done state"
|
124
dev/scale-test/scale-test.py
Normal file
124
dev/scale-test/scale-test.py
Normal file
@ -0,0 +1,124 @@
|
||||
import sys
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from kubernetes import client, config, watch
|
||||
|
||||
NAMESPACE = "default"
|
||||
NODE_PREFIX = "jmoody-work"
|
||||
NODE_COUNT = 100
|
||||
TEMPLATE_FILE = "statefulset.yaml"
|
||||
KUBE_CONFIG = None
|
||||
KUBE_CONTEXT = None
|
||||
# KUBE_CONFIG = "kubeconfig"
|
||||
# KUBE_CONTEXT = "jmoody-test-jmoody-control2"
|
||||
|
||||
|
||||
def create_sts_deployment(count):
|
||||
# @NODE_NAME@ - schedule each sts on a dedicated node
|
||||
# @STS_NAME@ - also used for the volume-name
|
||||
# create 100 stateful-sets
|
||||
for i in range(count):
|
||||
create_sts_yaml(i + 1)
|
||||
|
||||
|
||||
def create_sts_yaml(index):
|
||||
content = Path(TEMPLATE_FILE).read_text()
|
||||
content = content.replace("@NODE_NAME@", NODE_PREFIX + str(index))
|
||||
content = content.replace("@STS_NAME@", "sts" + str(index))
|
||||
file = Path("out/sts" + str(index) + ".yaml")
|
||||
file.parent.mkdir(parents=True, exist_ok=True)
|
||||
file.write_text(content)
|
||||
|
||||
|
||||
async def watch_pods_async():
|
||||
log = logging.getLogger('pod_events')
|
||||
log.setLevel(logging.INFO)
|
||||
v1 = client.CoreV1Api()
|
||||
w = watch.Watch()
|
||||
for event in w.stream(v1.list_namespaced_pod, namespace=NAMESPACE):
|
||||
process_pod_event(log, event)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def process_pod_event(log, event):
|
||||
log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
|
||||
if 'ADDED' in event['type']:
|
||||
pass
|
||||
elif 'DELETED' in event['type']:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
async def watch_pvc_async():
|
||||
log = logging.getLogger('pvc_events')
|
||||
log.setLevel(logging.INFO)
|
||||
v1 = client.CoreV1Api()
|
||||
w = watch.Watch()
|
||||
for event in w.stream(v1.list_namespaced_persistent_volume_claim, namespace=NAMESPACE):
|
||||
process_pvc_event(log, event)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def process_pvc_event(log, event):
|
||||
log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
|
||||
if 'ADDED' in event['type']:
|
||||
pass
|
||||
elif 'DELETED' in event['type']:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
async def watch_va_async():
|
||||
log = logging.getLogger('va_events')
|
||||
log.setLevel(logging.INFO)
|
||||
storage = client.StorageV1Api()
|
||||
w = watch.Watch()
|
||||
for event in w.stream(storage.list_volume_attachment):
|
||||
process_va_event(log, event)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def process_va_event(log, event):
|
||||
log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
|
||||
if 'ADDED' in event['type']:
|
||||
pass
|
||||
elif 'DELETED' in event['type']:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# create the sts deployment files
|
||||
create_sts_deployment(NODE_COUNT)
|
||||
|
||||
# setup the monitor
|
||||
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
logging.basicConfig(stream=sys.stdout,
|
||||
level=logging.INFO,
|
||||
format=log_format)
|
||||
config.load_kube_config(config_file=KUBE_CONFIG,
|
||||
context=KUBE_CONTEXT)
|
||||
logging.info("scale-test started")
|
||||
|
||||
# datastructures to keep track of the timings
|
||||
# TODO: process events and keep track of the results
|
||||
# results should be per pod/volume
|
||||
# information to keep track: pod index per sts
|
||||
# volume-creation time per pod
|
||||
# volume-attach time per pod
|
||||
# volume-detach time per pod
|
||||
pvc_to_va_map = dict()
|
||||
pvc_to_pod_map = dict()
|
||||
results = dict()
|
||||
|
||||
# start async event_loop
|
||||
event_loop = asyncio.get_event_loop()
|
||||
event_loop.create_task(watch_pods_async())
|
||||
event_loop.create_task(watch_pvc_async())
|
||||
event_loop.create_task(watch_va_async())
|
||||
event_loop.run_forever()
|
||||
logging.info("scale-test-finished")
|
41
dev/scale-test/statefulset.yaml
Normal file
41
dev/scale-test/statefulset.yaml
Normal file
@ -0,0 +1,41 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: @STS_NAME@
|
||||
spec:
|
||||
replicas: 0
|
||||
serviceName: @STS_NAME@
|
||||
selector:
|
||||
matchLabels:
|
||||
app: @STS_NAME@
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: @STS_NAME@
|
||||
spec:
|
||||
nodeName: @NODE_NAME@
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 10
|
||||
containers:
|
||||
- name: '@STS_NAME@'
|
||||
image: 'busybox:latest'
|
||||
command: ["/bin/sh", "-ec", "while :; do echo '.'; sleep 5 ; done"]
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- ls
|
||||
- /mnt/@STS_NAME@
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumeMounts:
|
||||
- name: @STS_NAME@
|
||||
mountPath: /mnt/@STS_NAME@
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: @STS_NAME@
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
storageClassName: "longhorn"
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
Loading…
Reference in New Issue
Block a user