From f176517638bfdbb8be081be144493a71cecb4c88 Mon Sep 17 00:00:00 2001 From: Joshua Moody Date: Thu, 30 Jul 2020 17:34:26 -0700 Subject: [PATCH] Add scale test scripts scale-test.py is currently a work in progress and needs some additional implementation for the data processing and analysis part. Signed-off-by: Joshua Moody --- dev/scale-test/.gitignore | 12 ++++ dev/scale-test/README.md | 27 +++++++ dev/scale-test/sample.sh | 19 +++++ dev/scale-test/scale-test.py | 124 ++++++++++++++++++++++++++++++++ dev/scale-test/statefulset.yaml | 41 +++++++++++ 5 files changed, 223 insertions(+) create mode 100644 dev/scale-test/.gitignore create mode 100644 dev/scale-test/README.md create mode 100755 dev/scale-test/sample.sh create mode 100644 dev/scale-test/scale-test.py create mode 100644 dev/scale-test/statefulset.yaml diff --git a/dev/scale-test/.gitignore b/dev/scale-test/.gitignore new file mode 100644 index 0000000..86383e9 --- /dev/null +++ b/dev/scale-test/.gitignore @@ -0,0 +1,12 @@ +# ignores all goland project folders and files +.idea +*.iml +*.ipr + +# ignore output folder +out +tmp +results + +# ignore kubeconfig +kubeconfig \ No newline at end of file diff --git a/dev/scale-test/README.md b/dev/scale-test/README.md new file mode 100644 index 0000000..5faa27e --- /dev/null +++ b/dev/scale-test/README.md @@ -0,0 +1,27 @@ +## Overview +scale-test is a collection of developer scripts that are used for scaling a cluster to a certain amount of volumes +while monitoring the time required to complete these actions. +`sample.sh` can be used to quickly see how long it takes for the requested amount of volumes to be up and usable. +`scale-test.py` can be used to create the amount of requested statefulsets based on the `statefulset.yaml` template, +as well as retrieve detailed timing information per volume. + + +### scale-test.py +scale-test.py watches `pod`, `pvc`, `va` events (ADDED, MODIFIED, DELETED). +Based on that information we can calculate the time of actions for each individual pod. + +In additional scale-test.py can also be used to create a set of statefulset deployment files. +based on the `statefulset.yaml` with the following VARIABLES substituted based on the current sts index. +`@NODE_NAME@` - schedule each sts on a dedicated node +`@STS_NAME@` - also used for the volume-name + +make sure to set the correct CONSTANT values in scale-test.py before running. + + +### sample.sh +sample.sh can be used to scale to a requested amount of volumes based on the existing statefulsets +and node count for the current cluster. + +One can pass the requested amount of volumes as well as the node count of the current cluster. +example for 1000 volumes and 100 nodes: `./sample.sh 1000 100` +this expects there to be a statefulset deployment for each node. diff --git a/dev/scale-test/sample.sh b/dev/scale-test/sample.sh new file mode 100755 index 0000000..173c98e --- /dev/null +++ b/dev/scale-test/sample.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +requested=${1:-0} +node_count=${2:-1} +required_scale=$((requested / node_count)) + +now=$(date) +ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true) +echo "$ready -- $now - start state" + +cmd=$(kubectl scale --replicas="$required_scale" statefulset --all) +echo "$cmd" +while [ "$ready" -ne "$requested" ]; do + sleep 60 + now=$(date) + ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true) + echo "$ready -- $now - delta:" +done +echo "$requested -- $now - done state" \ No newline at end of file diff --git a/dev/scale-test/scale-test.py b/dev/scale-test/scale-test.py new file mode 100644 index 0000000..518cf92 --- /dev/null +++ b/dev/scale-test/scale-test.py @@ -0,0 +1,124 @@ +import sys +import asyncio +import logging +from pathlib import Path +from kubernetes import client, config, watch + +NAMESPACE = "default" +NODE_PREFIX = "jmoody-work" +NODE_COUNT = 100 +TEMPLATE_FILE = "statefulset.yaml" +KUBE_CONFIG = None +KUBE_CONTEXT = None +# KUBE_CONFIG = "kubeconfig" +# KUBE_CONTEXT = "jmoody-test-jmoody-control2" + + +def create_sts_deployment(count): + # @NODE_NAME@ - schedule each sts on a dedicated node + # @STS_NAME@ - also used for the volume-name + # create 100 stateful-sets + for i in range(count): + create_sts_yaml(i + 1) + + +def create_sts_yaml(index): + content = Path(TEMPLATE_FILE).read_text() + content = content.replace("@NODE_NAME@", NODE_PREFIX + str(index)) + content = content.replace("@STS_NAME@", "sts" + str(index)) + file = Path("out/sts" + str(index) + ".yaml") + file.parent.mkdir(parents=True, exist_ok=True) + file.write_text(content) + + +async def watch_pods_async(): + log = logging.getLogger('pod_events') + log.setLevel(logging.INFO) + v1 = client.CoreV1Api() + w = watch.Watch() + for event in w.stream(v1.list_namespaced_pod, namespace=NAMESPACE): + process_pod_event(log, event) + await asyncio.sleep(0) + + +def process_pod_event(log, event): + log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name)) + if 'ADDED' in event['type']: + pass + elif 'DELETED' in event['type']: + pass + else: + pass + + +async def watch_pvc_async(): + log = logging.getLogger('pvc_events') + log.setLevel(logging.INFO) + v1 = client.CoreV1Api() + w = watch.Watch() + for event in w.stream(v1.list_namespaced_persistent_volume_claim, namespace=NAMESPACE): + process_pvc_event(log, event) + await asyncio.sleep(0) + + +def process_pvc_event(log, event): + log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name)) + if 'ADDED' in event['type']: + pass + elif 'DELETED' in event['type']: + pass + else: + pass + + +async def watch_va_async(): + log = logging.getLogger('va_events') + log.setLevel(logging.INFO) + storage = client.StorageV1Api() + w = watch.Watch() + for event in w.stream(storage.list_volume_attachment): + process_va_event(log, event) + await asyncio.sleep(0) + + +def process_va_event(log, event): + log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name)) + if 'ADDED' in event['type']: + pass + elif 'DELETED' in event['type']: + pass + else: + pass + + +if __name__ == '__main__': + # create the sts deployment files + create_sts_deployment(NODE_COUNT) + + # setup the monitor + log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + logging.basicConfig(stream=sys.stdout, + level=logging.INFO, + format=log_format) + config.load_kube_config(config_file=KUBE_CONFIG, + context=KUBE_CONTEXT) + logging.info("scale-test started") + + # datastructures to keep track of the timings + # TODO: process events and keep track of the results + # results should be per pod/volume + # information to keep track: pod index per sts + # volume-creation time per pod + # volume-attach time per pod + # volume-detach time per pod + pvc_to_va_map = dict() + pvc_to_pod_map = dict() + results = dict() + + # start async event_loop + event_loop = asyncio.get_event_loop() + event_loop.create_task(watch_pods_async()) + event_loop.create_task(watch_pvc_async()) + event_loop.create_task(watch_va_async()) + event_loop.run_forever() + logging.info("scale-test-finished") diff --git a/dev/scale-test/statefulset.yaml b/dev/scale-test/statefulset.yaml new file mode 100644 index 0000000..2293f2f --- /dev/null +++ b/dev/scale-test/statefulset.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: @STS_NAME@ +spec: + replicas: 0 + serviceName: @STS_NAME@ + selector: + matchLabels: + app: @STS_NAME@ + template: + metadata: + labels: + app: @STS_NAME@ + spec: + nodeName: @NODE_NAME@ + restartPolicy: Always + terminationGracePeriodSeconds: 10 + containers: + - name: '@STS_NAME@' + image: 'busybox:latest' + command: ["/bin/sh", "-ec", "while :; do echo '.'; sleep 5 ; done"] + livenessProbe: + exec: + command: + - ls + - /mnt/@STS_NAME@ + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: @STS_NAME@ + mountPath: /mnt/@STS_NAME@ + volumeClaimTemplates: + - metadata: + name: @STS_NAME@ + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: "longhorn" + resources: + requests: + storage: 1Gi