Add scale test scripts

scale-test.py is currently a work in progress and needs some additional implementation for the data processing and analysis part. Signed-off-by: Joshua Moody <joshua.moody@rancher.com>
2020-07-30 17:34:26 -07:00 · 2020-07-30 17:34:26 -07:00 · f176517638
commit f176517638
parent 0e4085b640
5 changed files with 223 additions and 0 deletions
--- a/dev/scale-test/.gitignore
+++ b/dev/scale-test/.gitignore
@ -0,0 +1,12 @@
+# ignores all goland project folders and files
+.idea
+*.iml
+*.ipr
+
+# ignore output folder
+out
+tmp
+results
+
+# ignore kubeconfig
+kubeconfig
--- a/dev/scale-test/README.md
+++ b/dev/scale-test/README.md
@ -0,0 +1,27 @@
+## Overview
+scale-test is a collection of developer scripts that are used for scaling a cluster to a certain amount of volumes
+while monitoring the time required to complete these actions.
+`sample.sh` can be used to quickly see how long it takes for the requested amount of volumes to be up and usable.
+`scale-test.py` can be used to create the amount of requested statefulsets based on the `statefulset.yaml` template,
+as well as retrieve detailed timing information per volume.
+
+
+### scale-test.py
+scale-test.py watches `pod`, `pvc`, `va` events (ADDED, MODIFIED, DELETED).
+Based on that information we can calculate the time of actions for each individual pod.
+
+In additional scale-test.py can also be used to create a set of statefulset deployment files.
+based on the `statefulset.yaml` with the following VARIABLES substituted based on the current sts index.
+`@NODE_NAME@` - schedule each sts on a dedicated node
+`@STS_NAME@` - also used for the volume-name
+
+make sure to set the correct CONSTANT values in scale-test.py before running.
+
+
+### sample.sh
+sample.sh can be used to scale to a requested amount of volumes based on the existing statefulsets 
+and node count for the current cluster.
+
+One can pass the requested amount of volumes as well as the node count of the current cluster.
+example for 1000 volumes and 100 nodes: `./sample.sh 1000 100` 
+this expects there to be a statefulset deployment for each node.
--- a/dev/scale-test/sample.sh
+++ b/dev/scale-test/sample.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+
+requested=${1:-0}
+node_count=${2:-1}
+required_scale=$((requested / node_count))
+
+now=$(date)
+ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true)
+echo "$ready -- $now - start state"
+
+cmd=$(kubectl scale --replicas="$required_scale" statefulset --all)
+echo "$cmd"
+while [ "$ready" -ne "$requested" ]; do
+  sleep 60
+  now=$(date)
+  ready=$(kubectl get pods -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,PodIP:status.podIP,READY:status.containerStatuses[*].ready | grep -c true)
+  echo "$ready -- $now - delta:"
+done
+echo "$requested -- $now - done state"
--- a/dev/scale-test/scale-test.py
+++ b/dev/scale-test/scale-test.py
@ -0,0 +1,124 @@
+import sys
+import asyncio
+import logging
+from pathlib import Path
+from kubernetes import client, config, watch
+
+NAMESPACE = "default"
+NODE_PREFIX = "jmoody-work"
+NODE_COUNT = 100
+TEMPLATE_FILE = "statefulset.yaml"
+KUBE_CONFIG = None
+KUBE_CONTEXT = None
+# KUBE_CONFIG = "kubeconfig"
+# KUBE_CONTEXT = "jmoody-test-jmoody-control2"
+
+
+def create_sts_deployment(count):
+    # @NODE_NAME@ - schedule each sts on a dedicated node
+    # @STS_NAME@ - also used for the volume-name
+    # create 100 stateful-sets
+    for i in range(count):
+        create_sts_yaml(i + 1)
+
+
+def create_sts_yaml(index):
+    content = Path(TEMPLATE_FILE).read_text()
+    content = content.replace("@NODE_NAME@", NODE_PREFIX + str(index))
+    content = content.replace("@STS_NAME@",  "sts" + str(index))
+    file = Path("out/sts" + str(index) + ".yaml")
+    file.parent.mkdir(parents=True, exist_ok=True)
+    file.write_text(content)
+
+
+async def watch_pods_async():
+    log = logging.getLogger('pod_events')
+    log.setLevel(logging.INFO)
+    v1 = client.CoreV1Api()
+    w = watch.Watch()
+    for event in w.stream(v1.list_namespaced_pod, namespace=NAMESPACE):
+        process_pod_event(log, event)
+        await asyncio.sleep(0)
+
+
+def process_pod_event(log, event):
+    log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
+    if 'ADDED' in event['type']:
+        pass
+    elif 'DELETED' in event['type']:
+        pass
+    else:
+        pass
+
+
+async def watch_pvc_async():
+    log = logging.getLogger('pvc_events')
+    log.setLevel(logging.INFO)
+    v1 = client.CoreV1Api()
+    w = watch.Watch()
+    for event in w.stream(v1.list_namespaced_persistent_volume_claim, namespace=NAMESPACE):
+        process_pvc_event(log, event)
+        await asyncio.sleep(0)
+
+
+def process_pvc_event(log, event):
+    log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
+    if 'ADDED' in event['type']:
+        pass
+    elif 'DELETED' in event['type']:
+        pass
+    else:
+        pass
+
+
+async def watch_va_async():
+    log = logging.getLogger('va_events')
+    log.setLevel(logging.INFO)
+    storage = client.StorageV1Api()
+    w = watch.Watch()
+    for event in w.stream(storage.list_volume_attachment):
+        process_va_event(log, event)
+        await asyncio.sleep(0)
+
+
+def process_va_event(log, event):
+    log.info("Event: %s %s %s" % (event['type'], event['object'].kind, event['object'].metadata.name))
+    if 'ADDED' in event['type']:
+        pass
+    elif 'DELETED' in event['type']:
+        pass
+    else:
+        pass
+
+
+if __name__ == '__main__':
+    # create the sts deployment files
+    create_sts_deployment(NODE_COUNT)
+
+    # setup the monitor
+    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    logging.basicConfig(stream=sys.stdout,
+                        level=logging.INFO,
+                        format=log_format)
+    config.load_kube_config(config_file=KUBE_CONFIG,
+                            context=KUBE_CONTEXT)
+    logging.info("scale-test started")
+
+    # datastructures to keep track of the timings
+    # TODO: process events and keep track of the results
+    #       results should be per pod/volume
+    #       information to keep track: pod index per sts
+    #       volume-creation time per pod
+    #       volume-attach time per pod
+    #       volume-detach time per pod
+    pvc_to_va_map = dict()
+    pvc_to_pod_map = dict()
+    results = dict()
+
+    # start async event_loop
+    event_loop = asyncio.get_event_loop()
+    event_loop.create_task(watch_pods_async())
+    event_loop.create_task(watch_pvc_async())
+    event_loop.create_task(watch_va_async())
+    event_loop.run_forever()
+    logging.info("scale-test-finished")
--- a/dev/scale-test/statefulset.yaml
+++ b/dev/scale-test/statefulset.yaml
@ -0,0 +1,41 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: @STS_NAME@
+spec:
+  replicas: 0
+  serviceName: @STS_NAME@
+  selector:
+    matchLabels:
+      app: @STS_NAME@
+  template:
+    metadata:
+      labels:
+        app: @STS_NAME@
+    spec:
+      nodeName: @NODE_NAME@
+      restartPolicy: Always
+      terminationGracePeriodSeconds: 10
+      containers:
+      - name: '@STS_NAME@'
+        image: 'busybox:latest'
+        command: ["/bin/sh", "-ec", "while :; do echo '.'; sleep 5 ; done"]
+        livenessProbe:
+          exec:
+            command:
+              - ls
+              - /mnt/@STS_NAME@
+          initialDelaySeconds: 5
+          periodSeconds: 5
+        volumeMounts:
+        - name: @STS_NAME@
+          mountPath: /mnt/@STS_NAME@
+  volumeClaimTemplates:
+  - metadata:
+      name: @STS_NAME@
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      storageClassName: "longhorn"
+      resources:
+        requests:
+          storage: 1Gi