Add quicker failover for nfs

Add tolerations so that nfs provisioner pod gets evicted from a failing node after 60 second + 30 grace period (relevant for va recovery policy). Add liveness + readyness probe, so that no traffic gets routed to a failed nfs server. Disable device based fsids (major:minor) since our block device mapping can change from node to node, which makes the id's unstable. Signed-off-by: Joshua Moody <joshua.moody@rancher.com>
2020-06-15 16:43:13 -07:00 · 2020-06-15 16:43:13 -07:00 · 8969d829f2
commit 8969d829f2
parent 51d693b42d
1 changed files with 28 additions and 2 deletions
--- a/examples/rwx/02-longhorn-nfs-provisioner.yaml
+++ b/examples/rwx/02-longhorn-nfs-provisioner.yaml
@ -102,6 +102,7 @@ spec:
                - SYS_RESOURCE
          args:
            - "-provisioner=nfs.longhorn.io"
+            - "-device-based-fsids=false"
          env:
            - name: POD_IP
              valueFrom:
@ -114,6 +115,20 @@ spec:
                fieldRef:
                  fieldPath: metadata.namespace
          imagePullPolicy: "IfNotPresent"
+          readinessProbe:
+            exec:
+              command:
+                - ls
+                - /export
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          livenessProbe:
+            exec:
+              command:
+                - ls
+                - /export
+            initialDelaySeconds: 5
+            periodSeconds: 5
          volumeMounts:
            - name: export-volume
              mountPath: /export
@ -121,6 +136,17 @@ spec:
        - name: export-volume
          persistentVolumeClaim:
            claimName: longhorn-nfs-provisioner
+      # we want really quick failover
+      terminationGracePeriodSeconds: 30
+      tolerations:
+        - effect: NoExecute
+          key: node.kubernetes.io/not-ready
+          operator: Exists
+          tolerationSeconds: 60
+        - effect: NoExecute
+          key: node.kubernetes.io/unreachable
+          operator: Exists
+          tolerationSeconds: 60
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
@ -139,5 +165,5 @@ kind: StorageClass
 metadata:
  name: longhorn-nfs # workload storage class
 provisioner: nfs.longhorn.io
-parameters:
-  mountOptions: "vers=4.1"
+mountOptions:
+  - "vers=4.1"