From 3757bf9e6c0b3512b45153f3dc7da003107eb961 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 1 Aug 2018 20:38:51 -0700 Subject: [PATCH 01/44] Update README.md and yaml for v0.3-rc1 --- README.md | 321 +++++++++++------- deploy/backupstores/minio-backupstore.yaml | 65 ++++ .../nfs-backupstore.yaml} | 14 +- deploy/longhorn-gke.yaml | 302 ---------------- deploy/longhorn.yaml | 129 ++++--- examples/csi/example_pv.yaml | 50 +++ examples/example_storageclass.yaml | 41 +++ examples/{ => flexvolume}/example.yaml | 0 examples/{ => flexvolume}/example_pv.yaml | 0 .../storageclass.yaml | 0 10 files changed, 457 insertions(+), 465 deletions(-) create mode 100644 deploy/backupstores/minio-backupstore.yaml rename deploy/{example-backupstore.yaml => backupstores/nfs-backupstore.yaml} (59%) delete mode 100644 deploy/longhorn-gke.yaml create mode 100644 examples/csi/example_pv.yaml create mode 100644 examples/example_storageclass.yaml rename examples/{ => flexvolume}/example.yaml (100%) rename examples/{ => flexvolume}/example_pv.yaml (100%) rename deploy/example-storageclass.yaml => examples/storageclass.yaml (100%) diff --git a/README.md b/README.md index 946ea36..855fd93 100644 --- a/README.md +++ b/README.md @@ -19,113 +19,116 @@ Longhorn is 100% open source software. Project source code is spread across a nu [![Longhorn v0.2 Demo](https://asciinema.org/a/172720.png)](https://asciinema.org/a/172720?autoplay=1&loop=1&speed=2) -# Deploy on Kubernetes +# Requirements -## Requirements +## Minimal Requirements -1. Docker v1.13+ -2. Kubernetes v1.8+ -3. Make sure `curl`, `findmnt`, `grep`, `awk` and `blkid` has been installed in all nodes of the Kubernetes cluster. -4. Make sure `open-iscsi` has been installed in all nodes of the Kubernetes cluster. For GKE, recommended Ubuntu as guest OS image since it contains `open-iscsi` already. +1. Docker v1.13+ +2. Kubernetes v1.8+ +3. Make sure open-iscsi has been installed in all nodes of the Kubernetes cluster. For GKE, recommended Ubuntu as guest OS image since it contains open-iscsi already. -## Deployment -Create the deployment of Longhorn in your Kubernetes cluster is easy. For most Kubernetes setup (except GKE), you will only need to run the following command to install Longhorn: +## Kubernetes Driver Requirements + +Longhorn can be used in Kubernetes to provide persistent storage through either Longhorn Container Storage Interface (CSI) driver or Longhorn Flexvolume driver. Longhorn will automatically deploy one of the drivers, depends on user's Kubernetes cluster's setup. User can also specify the driver in the deployment yaml file. CSI is preferred. + +### Requirement for the CSI driver + +1. Kubernetes v1.10+ +1.1 CSI is in beta release for this version of Kubernetes, and enabled by default. +2. Mount Propagation feature gate enabled. +2.1 It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. +3. If above conditions cannot be met, Longhorn will falls back to use Flexvolume driver. + +### Requirement for the Flexvolume driver + +1. Kubernetes v1.8+ +2. Make sure `curl`, `findmnt`, `grep`, `awk` and `blkid` has been installed in the every node of the Kubernetes cluster. +3. User need to know the volume plugin directory in order to setup the driver correctly. + 1. Rancher RKE: `/var/lib/kubelet/volumeplugins` + 2. Google GKE: `/home/kubernetes/flexvolume` + 3. For other distro, please find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir` parameter. If there is none, it would be the default value `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` . + +# Deployment + +Create the deployment of Longhorn in your Kubernetes cluster is easy. + +If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/master/deploy/longhorn.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml ``` - -For Google Kubernetes Engine (GKE) users, see [here](#google-kubernetes-engine) before proceed. +If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. +``` +FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" +curl -s https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +kubectl create -f longhorn.yaml +``` +For Google Kubernetes Engine (GKE) users, see [here](#google-kubernetes-engine) before proceed. Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. +Deployed with CSI driver: ``` # kubectl -n longhorn-system get pod -NAME READY STATUS RESTARTS AGE -longhorn-flexvolume-driver-4dnx6 1/1 Running 0 1d -longhorn-flexvolume-driver-cqwj5 1/1 Running 0 1d -longhorn-flexvolume-driver-deployer-bc7b95b5b-sb9kr 1/1 Running 0 1d -longhorn-flexvolume-driver-q9h4f 1/1 Running 0 1d -longhorn-manager-dkdn9 1/1 Running 0 2h -longhorn-manager-l6npd 1/1 Running 0 2h -longhorn-manager-v4fz8 1/1 Running 0 2h -longhorn-ui-58796c68d-db4t6 1/1 Running 0 1h +NAME READY STATUS RESTARTS AGE +csi-attacher-0 1/1 Running 0 6h +csi-provisioner-0 1/1 Running 0 6h +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-csi-plugin-4cpk2 2/2 Running 0 6h +longhorn-csi-plugin-ll6mq 2/2 Running 0 6h +longhorn-csi-plugin-smlsh 2/2 Running 0 6h +longhorn-driver-deployer-7b5bdcccc8-fbncl 1/1 Running 0 6h +longhorn-manager-7x8x8 1/1 Running 0 6h +longhorn-manager-8kqf4 1/1 Running 0 6h +longhorn-manager-kln4h 1/1 Running 0 6h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +``` +Or with Flexvolume driver +``` +# kubectl -n longhorn-system get pod +NAME READY STATUS RESTARTS AGE +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-driver-deployer-5469b87b9c-b9gm7 1/1 Running 0 2h +longhorn-flexvolume-driver-lth5g 1/1 Running 0 2h +longhorn-flexvolume-driver-tpqf7 1/1 Running 0 2h +longhorn-flexvolume-driver-v9mrj 1/1 Running 0 2h +longhorn-manager-7x8x8 1/1 Running 0 9h +longhorn-manager-8kqf4 1/1 Running 0 9h +longhorn-manager-kln4h 1/1 Running 0 9h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d ``` ## Access the UI + Use `kubectl -n longhorn-system get svc` to get the external service IP for UI: ``` NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE longhorn-backend ClusterIP 10.20.248.250 9500/TCP 58m longhorn-frontend LoadBalancer 10.20.245.110 100.200.200.123 80:30697/TCP 58m + ``` -If the Kubernetes Cluster supports creating LoadBalancer, user can then use `EXTERNAL-IP`(`100.200.200.123` in the case above) of `longhorn-frontend` to access the Longhorn UI. Otherwise the user can use `:` (port is `30697` in the case above) to access the UI. +If the Kubernetes Cluster supports creating LoadBalancer, user can then use `EXTERNAL-IP`(`100.200.200.123` in the case above) of `longhorn-frontend` to access the Longhorn UI. Otherwise the user can use `:` (port is `30697`in the case above) to access the UI. Longhorn UI would connect to the Longhorn Manager API, provides the overview of the system, the volume operations, and the snapshot/backup operations. It's highly recommended for the user to check out Longhorn UI. Notice the current UI is unauthenticated. -## How to use the Longhorn Volume in your pod +# Use the Longhorn with Kubernetes -There are serveral ways to use the Longhorn volume. +Longhorn provides persistent volume directly to Kubernetes through one of the Longhorn drivers. No matter which driver you're using, you can use Kubernetes StorageClass to provision your persistent volumes. -### Pod with Longhorn volume -The following YAML file shows the definition of a pod that makes the Longhorn attach a volume to be used by the pod. +Use following command to create a default Longhorn StorageClass named `longhorn`. ``` -apiVersion: v1 -kind: Pod -metadata: - name: volume-test - namespace: default -spec: - containers: - - name: volume-test - image: nginx:stable-alpine - imagePullPolicy: IfNotPresent - volumeMounts: - - name: voll - mountPath: /data - ports: - - containerPort: 80 - volumes: - - name: voll - flexVolume: - driver: "rancher.io/longhorn" - fsType: "ext4" - options: - size: "2Gi" - numberOfReplicas: "3" - staleReplicaTimeout: "20" - fromBackup: "" +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/example-storageclass.yaml ``` - -Notice this field in the YAML file: `flexVolume.driver "rancher.io/longhorn"`. It specifies that the Longhorn FlexVolume plug-in should be used. There are some option fields in `options` the user can fill in. - -Option | Required | Description -------------- | ----|--------- -size | Yes | Specify the capacity of the volume in longhorn and the unit should be `G` -numberOfReplicas | Yes | The number of replicas (HA feature) for volume in this Longhorn volume -fromBackup | No | Optional. Must be a Longhorn Backup URL. Specify where the user want to restore the volume from. - -### Storage class - -Longhorn supports dynamic provisioner function, which can create PV automatically for the user according to the spec of storage class and PVC. The user needs to create a new storage class in order to use it. The storage class example can be downloaded from [here](./deploy/example-storageclass.yaml) -``` -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: longhorn -provisioner: rancher.io/longhorn -parameters: - numberOfReplicas: "3" - staleReplicaTimeout: "30" - fromBackup: "" -``` - Then user can create a PVC directly. For example: ``` apiVersion: v1 @@ -163,82 +166,162 @@ spec: persistentVolumeClaim: claimName: longhorn-volv-pvc ``` +More examples are available at `./examples/` -## Setup a TESTING ONLY NFS server for storing backups +# Feature Usage +### Snapshot +A snapshot in Longhorn represents a volume state at a given time, stored in the same location of volume data on physical disk of the host. Snapshot creation is instant in Longhorn. -Longhorn supports backing up mechanisms to export the user data out of the Longhorn system. Currently Longhorn supports backing up to a NFS server. In order to use this feature, you need to have a NFS server running and accessible in the Kubernetes cluster. Here we provide a simple way to setup a testing NFS server. +User can revert to any previous taken snapshot using the UI. Since Longhorn is a distributed block storage, please make sure the Longhorn volume is umounted from the host when revert to any previous snapshot, otherwise it will confuse the node filesystem and cause corruption. +### Backup +A backup in Longhorn represents a volume state at a given time, stored in the BackupStore which is outside of the Longhorn System. Backup creation will involving copying the data through the network, so it will take time. -WARNING: This NFS server won't save any data after you delete it. It's for TESTING ONLY. +A corresponding snapshot is needed for creating a backup. And user can choose to backup any snapshot previous created. +A BackupStore is a NFS server or S3 compatible server. + +A BackupTarget represents a BackupStore in the Longhorn System. The BackupTarget can be set at `Settings/General/BackupTarget` + +If user is using a S3 compatible server as the BackupTarget, the BackupTargetSecret is needed for authentication informations. User need to manually create it as a Kubernetes Secret in the `longhorn-system` namespace. See below for details. + +#### Setup a testing backupstore +We provides two testing purpose backupstore based on NFS server and Minio S3 server for testing, in `./deploy/backupstores`. + +Use following command to setup a Minio S3 server for BackupStore after `longhorn-system` was created. ``` -kubectl create -f deploy/example-backupstore.yaml -``` -It will create a simple NFS server in the `default` namespace, which can be addressed as `longhorn-test-nfs-svc.default` for other pods in the cluster. - -After this script completes, using the following URL as the Backup Target in the Longhorn setting: -``` -nfs://longhorn-test-nfs-svc.default:/opt/backupstore -``` -Open Longhorn UI, go to Setting, fill the Backup Target field with the URL above, click Save. Now you should able to use the backup feature of Longhorn. - -## Google Kubernetes Engine -The configuration yaml will be slight different for Google Kubernetes Engine (GKE): - -1. GKE requires user to manually claim himself as cluster admin to enable RBAC. User need to execute following command before create the Longhorn system using yaml files. -``` -kubectl create clusterrolebinding cluster-admin-binding --clusterrole=cluster-admin --user= -``` -In which `name@example.com` is the user's account name in GCE, and it's case sensitive. -See [here](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control) for details. - -2. The default Flexvolume plugin directory is different with GKE 1.8+, which is at `/home/kubernetes/flexvolume`. User need to use following command instead: -``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/master/deploy/longhorn-gke.yaml +kubectl -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/backupstores/minio-backupstore.yaml ``` -User can also customerize the Flexvolume directory in the last part of the Longhorn system deployment yaml file, e.g.: +Now set `Settings/General/BackupTarget` to ``` - - name: FLEXVOLUME_DIR - value: "/home/kubernetes/flexvolume/" +http://minio-service.default:9000 ``` +And `Setttings/General/BackupTargetSecret` to +``` +minio-secret +``` +Click the `Backup` tab in the UI, it should report an empty list without error out. -See [Troubleshooting](#troubleshooting) for details. +### Recurring Snapshot and Backup +Longhorn supports recurring snapshot and backup for volumes. User only need to set when he/she wish to take the snapshot and/or backup, and how many snapshots/backups needs to be retains, then Longhorn will automatically create snapshot/backup for the user at that time, as long as the volume is attached to a node. + +User can find the setting for the recurring snapshot and backup in the `Volume Detail` page. + +### Multiple disks support +Longhorn supports to use more than one disk on the nodes to store the volume data. + +To add a new disk for a node, heading to `Node` tab, select one of the node, and click the edit disk icon. + +By default, `/var/lib/rancher/longhorn` on the host will be used for storing the volume data. + +To add any additional disks, user needs to: +1. Mount the disk on the host to a certain directory. +2. Add the path of the mounted disk into the disk list of the node. + +Longhorn will detect the storage information (e.g. maximum space, available space) about the disk automatically, and start scheduling to it if it's possible to accomodate the volume in there. A path mounted by the existing disk won't be allowed. + +User can reserve a certain amount of space of the disk to stop Longhorn from using it. It can be set in the `Space Reserved` field for the disk. It's useful for the non-dedicated storage disk on the node. + +Nodes and disks can be excluded from future scheduling. Notice any scheduled storage space won't be released automatically if the scheduling was disabled for the node. + +There are two global settings affect the scheduling of the volume as well. + +`StorageOverProvisioningPercentage` defines the upper bound of `ScheduledStorage / (MaximumStorage - ReservedStorage)` . The default value is `500` (%). That means we can schedule a total of 750 GiB Longhorn volumes on a 200 GiB disk with 50G reserved for the root file system. Because normally people won't use that large amount of data in the volume, and we store the volumes as sparse files. + +`StorageMinimalAvailablePercentage` defines when a disk cannot be scheduled with more volumes. The default value is `10` (%). The bigger value between `MaximumStorage * StorageMinimalAvailablePercentage / 100` and `MaximumStorage - ReservedStorage` will be used to determine if a disk is running low and cannot be scheduled with more volumes. + +Notice currently there is no guarantee that the space volumes used won't exceed the `StorageMinimalAvailablePercentage`, because: +1. Longhorn volume can be bigger than specified size, due to the snapshot contains the old state of the volume +2. And Longhorn is doing over-provisioning by default. ## Uninstall Longhorn -In order to uninstall Longhorn, user need to remove all the volumes first: + +Longhorn CRD has finalizers in them, so user should delete the volumes and related resource first, give manager a chance to clean up after them. + +### 1. Clean up volume and related resources + ``` -kubectl -n longhorn-system delete lhv --all +kubectl -n longhorn-system delete volumes.longhorn.rancher.io --all + ``` -After confirming all the volumes are removed, then Longhorn can be easily uninstalled using: +Check the result using: + ``` -kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/master/deploy/longhorn.yaml +kubectl -n longhorn-system get volumes.longhorn.rancher.io +kubectl -n longhorn-system get engines.longhorn.rancher.io +kubectl -n longhorn-system get replicas.longhorn.rancher.io + ``` +Make sure all reports `No resources found.` before continuing. + +### 2. Clean up engine images and nodes + +``` +kubectl -n longhorn-system delete engineimages.longhorn.rancher.io --all +kubectl -n longhorn-system delete nodes.longhorn.rancher.io --all + +``` + +Check the result using: + +``` +kubectl -n longhorn-system get engineimages.longhorn.rancher.io +kubectl -n longhorn-system get nodes.longhorn.rancher.io + +``` + +Make sure all reports `No resources found.` before continuing. + +### 3. Uninstall Longhorn System +``` +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml +``` + +## Notes +### Google Kubernetes Engine + +The configuration yaml will be slight different for Google Kubernetes Engine (GKE): + +1. GKE requires user to manually claim himself as cluster admin to enable RBAC. User need to execute following command before create the Longhorn system using yaml files. + +``` +kubectl create clusterrolebinding cluster-admin-binding --clusterrole=cluster-admin --user= + +``` + +In which `name@example.com` is the user's account name in GCE, and it's case sensitive. See [here](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control) for details. + +2. The default Flexvolume plugin directory is different with GKE 1.8+, which is at `/home/kubernetes/flexvolume`. User need to use following command instead: + +``` +FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" +curl -s https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +kubectl create -f longhorn.yaml +``` + +See [Troubleshooting](#troubleshooting) for details. + ## Troubleshooting ### Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it Check if volume plugin directory has been set correctly. -By default, Kubernetes use `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` as the directory for volume plugin drivers, as stated in the [official document](https://github.com/kubernetes/community/blob/master/contributors/devel/flexvolume.md#prerequisites). +By default, Kubernetes use `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` as the directory for volume plugin drivers, as stated in the [official document](https://github.com/kubernetes/community/blob/master/contributors/devel/flexvolume.md#prerequisites). But some vendors may choose to change the directory due to various reasons. For example, GKE uses `/home/kubernetes/flexvolume`, and RKE uses `/var/lib/kubelet/volumeplugins`. -User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir` parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. +User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. ## License -Copyright (c) 2014-2018 [Rancher Labs, Inc.](http://rancher.com) -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Copyright (c) 2014-2018 [Rancher Labs, Inc.](http://rancher.com/) + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/deploy/backupstores/minio-backupstore.yaml b/deploy/backupstores/minio-backupstore.yaml new file mode 100644 index 0000000..23d7867 --- /dev/null +++ b/deploy/backupstores/minio-backupstore.yaml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret +type: Opaque +data: + AWS_ACCESS_KEY_ID: bG9uZ2hvcm4tdGVzdC1hY2Nlc3Mta2V5 # longhorn-test-access-key + AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key + AWS_ENDPOINTS: aHR0cDovL21pbmlvLXNlcnZpY2UuZGVmYXVsdDo5MDAw # http://minio-service.default:9000 +--- +# same secret for longhorn-system namespace +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret + namespace: longhorn-system +type: Opaque +data: + AWS_ACCESS_KEY_ID: bG9uZ2hvcm4tdGVzdC1hY2Nlc3Mta2V5 # longhorn-test-access-key + AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key + AWS_ENDPOINTS: aHR0cDovL21pbmlvLXNlcnZpY2UuZGVmYXVsdDo5MDAw # http://minio-service.default:9000 +--- +apiVersion: v1 +kind: Pod +metadata: + name: longhorn-test-minio + labels: + app: longhorn-test-minio +spec: + volumes: + - name: minio-volume + emptyDir: {} + containers: + - name: minio + image: minio/minio + command: ["sh", "-c", "mkdir -p /storage/backupbucket && exec /usr/bin/minio server /storage"] + env: + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: AWS_ACCESS_KEY_ID + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: AWS_SECRET_ACCESS_KEY + ports: + - containerPort: 9000 + volumeMounts: + - name: minio-volume + mountPath: "/storage" +--- +apiVersion: v1 +kind: Service +metadata: + name: minio-service +spec: + selector: + app: longhorn-test-minio + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + sessionAffinity: ClientIP diff --git a/deploy/example-backupstore.yaml b/deploy/backupstores/nfs-backupstore.yaml similarity index 59% rename from deploy/example-backupstore.yaml rename to deploy/backupstores/nfs-backupstore.yaml index 6143460..7f4ce9c 100644 --- a/deploy/example-backupstore.yaml +++ b/deploy/backupstores/nfs-backupstore.yaml @@ -5,6 +5,9 @@ metadata: labels: app: longhorn-test-nfs spec: + volumes: + - name: nfs-volume + emptyDir: {} containers: - name: longhorn-test-nfs-container image: janeczku/nfs-ganesha:latest @@ -16,10 +19,19 @@ spec: value: /opt/backupstore - name: PSEUDO_PATH value: /opt/backupstore - command: ["bash", "-c", "mkdir -p /opt/backupstore && /opt/start_nfs.sh"] + command: ["bash", "-c", "chmod 700 /opt/backupstore && /opt/start_nfs.sh | tee /var/log/ganesha.log"] securityContext: + privileged: true capabilities: add: ["SYS_ADMIN", "DAC_READ_SEARCH"] + volumeMounts: + - name: nfs-volume + mountPath: "/opt/backupstore" + livenessProbe: + exec: + command: ["bash", "-c", "grep \"No export entries found\" /var/log/ganesha.log > /dev/null 2>&1 ; [ $? -ne 0 ]"] + initialDelaySeconds: 5 + periodSeconds: 5 --- kind: Service apiVersion: v1 diff --git a/deploy/longhorn-gke.yaml b/deploy/longhorn-gke.yaml deleted file mode 100644 index e877543..0000000 --- a/deploy/longhorn-gke.yaml +++ /dev/null @@ -1,302 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: longhorn-system ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: longhorn-service-account - namespace: longhorn-system ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: longhorn-role -rules: -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - "*" -- apiGroups: [""] - resources: ["pods", "events", "persistentvolumes", "persistentvolumeclaims", "nodes", "proxy/nodes"] - verbs: ["*"] -- apiGroups: ["extensions"] - resources: ["daemonsets"] - verbs: ["*"] -- apiGroups: ["batch"] - resources: ["jobs", "cronjobs"] - verbs: ["*"] -- apiGroups: ["storage.k8s.io"] - resources: ["storageclasses"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["nodes"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["volumes"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["engines"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["replicas"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["settings"] - verbs: ["*"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: longhorn-bind -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: longhorn-role -subjects: -- kind: ServiceAccount - name: longhorn-service-account - namespace: longhorn-system ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - labels: - longhorn-manager: Engine - name: engines.longhorn.rancher.io -spec: - group: longhorn.rancher.io - names: - kind: Engine - listKind: EngineList - plural: engines - shortNames: - - lhe - singular: engine - scope: Namespaced - version: v1alpha1 ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - labels: - longhorn-manager: Replica - name: replicas.longhorn.rancher.io -spec: - group: longhorn.rancher.io - names: - kind: Replica - listKind: ReplicaList - plural: replicas - shortNames: - - lhr - singular: replica - scope: Namespaced - version: v1alpha1 ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - labels: - longhorn-manager: Setting - name: settings.longhorn.rancher.io -spec: - group: longhorn.rancher.io - names: - kind: Setting - listKind: SettingList - plural: settings - shortNames: - - lhs - singular: setting - scope: Namespaced - version: v1alpha1 ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - labels: - longhorn-manager: Volume - name: volumes.longhorn.rancher.io -spec: - group: longhorn.rancher.io - names: - kind: Volume - listKind: VolumeList - plural: volumes - shortNames: - - lhv - singular: volume - scope: Namespaced - version: v1alpha1 ---- -apiVersion: extensions/v1beta1 -kind: DaemonSet -metadata: - labels: - app: longhorn-manager - name: longhorn-manager - namespace: longhorn-system -spec: - template: - metadata: - labels: - app: longhorn-manager - spec: - initContainers: - - name: init-container - image: rancher/longhorn-engine:de88734 - command: ['sh', '-c', 'cp /usr/local/bin/* /data/'] - volumeMounts: - - name: execbin - mountPath: /data/ - containers: - - name: longhorn-manager - image: rancher/longhorn-manager:010fe60 - imagePullPolicy: Always - securityContext: - privileged: true - command: - - longhorn-manager - - -d - - daemon - - --engine-image - - rancher/longhorn-engine:de88734 - - --manager-image - - rancher/longhorn-manager:010fe60 - - --service-account - - longhorn-service-account - ports: - - containerPort: 9500 - volumeMounts: - - name: dev - mountPath: /host/dev/ - - name: proc - mountPath: /host/proc/ - - name: varrun - mountPath: /var/run/ - - name: longhorn - mountPath: /var/lib/rancher/longhorn/ - - name: execbin - mountPath: /usr/local/bin/ - env: - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - volumes: - - name: dev - hostPath: - path: /dev/ - - name: proc - hostPath: - path: /proc/ - - name: varrun - hostPath: - path: /var/run/ - - name: longhorn - hostPath: - path: /var/lib/rancher/longhorn/ - - name: execbin - emptyDir: {} - serviceAccountName: longhorn-service-account ---- -kind: Service -apiVersion: v1 -metadata: - labels: - app: longhorn-manager - name: longhorn-backend - namespace: longhorn-system -spec: - selector: - app: longhorn-manager - ports: - - port: 9500 - targetPort: 9500 - sessionAffinity: ClientIP ---- -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - labels: - app: longhorn-ui - name: longhorn-ui - namespace: longhorn-system -spec: - replicas: 1 - template: - metadata: - labels: - app: longhorn-ui - spec: - containers: - - name: longhorn-ui - image: rancher/longhorn-ui:1455f4f - ports: - - containerPort: 8000 - env: - - name: LONGHORN_MANAGER_IP - value: "http://longhorn-backend:9500" ---- -kind: Service -apiVersion: v1 -metadata: - labels: - app: longhorn-ui - name: longhorn-frontend - namespace: longhorn-system -spec: - selector: - app: longhorn-ui - ports: - - port: 80 - targetPort: 8000 - type: LoadBalancer ---- -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: longhorn-flexvolume-driver-deployer - namespace: longhorn-system -spec: - replicas: 1 - template: - metadata: - labels: - app: longhorn-flexvolume-driver-deployer - spec: - containers: - - name: longhorn-flexvolume-driver-deployer - image: rancher/longhorn-manager:010fe60 - imagePullPolicy: Always - command: - - longhorn-manager - - -d - - deploy-flexvolume-driver - - --manager-image - - rancher/longhorn-manager:010fe60 - env: - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: FLEXVOLUME_DIR - value: "/home/kubernetes/flexvolume/" - serviceAccountName: longhorn-service-account ---- diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index 9c82e99..c95d3af 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -21,31 +21,22 @@ rules: verbs: - "*" - apiGroups: [""] - resources: ["pods", "events", "persistentvolumes", "persistentvolumeclaims", "nodes", "proxy/nodes"] + resources: ["pods", "events", "persistentvolumes", "persistentvolumeclaims", "nodes", "proxy/nodes", "pods/log", "secrets", "services"] verbs: ["*"] -- apiGroups: ["extensions"] - resources: ["daemonsets"] +- apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +- apiGroups: ["apps"] + resources: ["daemonsets", "statefulsets"] verbs: ["*"] - apiGroups: ["batch"] resources: ["jobs", "cronjobs"] verbs: ["*"] - apiGroups: ["storage.k8s.io"] - resources: ["storageclasses"] + resources: ["storageclasses", "volumeattachments"] verbs: ["*"] - apiGroups: ["longhorn.rancher.io"] - resources: ["nodes"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["volumes"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["engines"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["replicas"] - verbs: ["*"] -- apiGroups: ["longhorn.rancher.io"] - resources: ["settings"] + resources: ["volumes", "engines", "replicas", "settings", "engineimages", "nodes"] verbs: ["*"] --- apiVersion: rbac.authorization.k8s.io/v1beta1 @@ -133,7 +124,43 @@ spec: scope: Namespaced version: v1alpha1 --- -apiVersion: extensions/v1beta1 +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + labels: + longhorn-manager: EngineImage + name: engineimages.longhorn.rancher.io +spec: + group: longhorn.rancher.io + names: + kind: EngineImage + listKind: EngineImageList + plural: engineimages + shortNames: + - lhei + singular: engineimage + scope: Namespaced + version: v1alpha1 +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + labels: + longhorn-manager: Node + name: nodes.longhorn.rancher.io +spec: + group: longhorn.rancher.io + names: + kind: Node + listKind: NodeList + plural: nodes + shortNames: + - lhn + singular: node + scope: Namespaced + version: v1alpha1 +--- +apiVersion: apps/v1beta2 kind: DaemonSet metadata: labels: @@ -141,21 +168,17 @@ metadata: name: longhorn-manager namespace: longhorn-system spec: + selector: + matchLabels: + app: longhorn-manager template: metadata: labels: app: longhorn-manager spec: - initContainers: - - name: init-container - image: rancher/longhorn-engine:de88734 - command: ['sh', '-c', 'cp /usr/local/bin/* /data/'] - volumeMounts: - - name: execbin - mountPath: /data/ containers: - name: longhorn-manager - image: rancher/longhorn-manager:010fe60 + image: rancher/longhorn-manager:06a81b9 imagePullPolicy: Always securityContext: privileged: true @@ -164,9 +187,9 @@ spec: - -d - daemon - --engine-image - - rancher/longhorn-engine:de88734 + - rancher/longhorn-engine:31c42f0 - --manager-image - - rancher/longhorn-manager:010fe60 + - rancher/longhorn-manager:06a81b9 - --service-account - longhorn-service-account ports: @@ -180,8 +203,7 @@ spec: mountPath: /var/run/ - name: longhorn mountPath: /var/lib/rancher/longhorn/ - - name: execbin - mountPath: /usr/local/bin/ + mountPropagation: Bidirectional env: - name: POD_NAMESPACE valueFrom: @@ -208,8 +230,6 @@ spec: - name: longhorn hostPath: path: /var/lib/rancher/longhorn/ - - name: execbin - emptyDir: {} serviceAccountName: longhorn-service-account --- kind: Service @@ -227,7 +247,7 @@ spec: targetPort: 9500 sessionAffinity: ClientIP --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1beta2 kind: Deployment metadata: labels: @@ -236,6 +256,9 @@ metadata: namespace: longhorn-system spec: replicas: 1 + selector: + matchLabels: + app: longhorn-ui template: metadata: labels: @@ -243,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:1455f4f + image: rancher/longhorn-ui:47e0b2a ports: - containerPort: 8000 env: @@ -265,28 +288,40 @@ spec: targetPort: 8000 type: LoadBalancer --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1beta2 kind: Deployment metadata: - name: longhorn-flexvolume-driver-deployer + name: longhorn-driver-deployer namespace: longhorn-system spec: replicas: 1 + selector: + matchLabels: + app: longhorn-driver-deployer template: metadata: labels: - app: longhorn-flexvolume-driver-deployer + app: longhorn-driver-deployer spec: + initContainers: + - name: wait-longhorn-manager + image: rancher/longhorn-manager:06a81b9 + command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - - name: longhorn-flexvolume-driver-deployer - image: rancher/longhorn-manager:010fe60 + - name: longhorn-driver-deployer + image: rancher/longhorn-manager:06a81b9 imagePullPolicy: Always command: - longhorn-manager - -d - - deploy-flexvolume-driver + - deploy-driver - --manager-image - - rancher/longhorn-manager:010fe60 + - rancher/longhorn-manager:06a81b9 + - --manager-url + - http://longhorn-backend:9500/v1 + # manually choose "flexvolume" or "csi" + #- --driver + #- flexvolume env: - name: POD_NAMESPACE valueFrom: @@ -296,9 +331,17 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName + - name: SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName - name: FLEXVOLUME_DIR - value: "" - #FOR GKE + value: "/var/lib/kubelet/volumeplugins" + # FOR RKE + #value: "/var/lib/kubelet/volumeplugins" + # FOR GKE #value: "/home/kubernetes/flexvolume/" + # For default or auto detection with Kubernetes <= v1.8 + #value: "" serviceAccountName: longhorn-service-account --- diff --git a/examples/csi/example_pv.yaml b/examples/csi/example_pv.yaml new file mode 100644 index 0000000..6943462 --- /dev/null +++ b/examples/csi/example_pv.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: longhorn-vol-pv +spec: + capacity: + storage: 2Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + csi: + driver: io.rancher.longhorn + fsType: ext4 + volumeAttributes: + numberOfReplicas: '3' + staleReplicaTimeout: '30' + volumeHandle: existing-longhorn-volume +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: longhorn-vol-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi + volumeName: longhorn-vol-pv +--- +apiVersion: v1 +kind: Pod +metadata: + name: volume-test + namespace: default +spec: + containers: + - name: volume-test + image: nginx:stable-alpine + imagePullPolicy: IfNotPresent + volumeMounts: + - name: vol + mountPath: /data + ports: + - containerPort: 80 + volumes: + - name: vol + persistentVolumeClaim: + claimName: longhorn-vol-pvc diff --git a/examples/example_storageclass.yaml b/examples/example_storageclass.yaml new file mode 100644 index 0000000..0f4e9bd --- /dev/null +++ b/examples/example_storageclass.yaml @@ -0,0 +1,41 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: longhorn +provisioner: rancher.io/longhorn +parameters: + numberOfReplicas: '3' + staleReplicaTimeout: '30' +reclaimPolicy: Delete +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: longhorn-vol-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi + storageClassName: longhorn +--- +apiVersion: v1 +kind: Pod +metadata: + name: volume-test + namespace: default +spec: + containers: + - name: volume-test + image: nginx:stable-alpine + imagePullPolicy: IfNotPresent + volumeMounts: + - name: vol + mountPath: /data + ports: + - containerPort: 80 + volumes: + - name: vol + persistentVolumeClaim: + claimName: longhorn-vol-pvc diff --git a/examples/example.yaml b/examples/flexvolume/example.yaml similarity index 100% rename from examples/example.yaml rename to examples/flexvolume/example.yaml diff --git a/examples/example_pv.yaml b/examples/flexvolume/example_pv.yaml similarity index 100% rename from examples/example_pv.yaml rename to examples/flexvolume/example_pv.yaml diff --git a/deploy/example-storageclass.yaml b/examples/storageclass.yaml similarity index 100% rename from deploy/example-storageclass.yaml rename to examples/storageclass.yaml From dfcf7ca8cfea704b4f875f593b283bcbb16fa89b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 00:13:34 -0700 Subject: [PATCH 02/44] Fix wrong path of storageclass.yaml --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 855fd93..6802d84 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Longhorn provides persistent volume directly to Kubernetes through one of the Lo Use following command to create a default Longhorn StorageClass named `longhorn`. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/example-storageclass.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/examples/storageclass.yaml ``` Then user can create a PVC directly. For example: ``` From 3418431ead1c97d835b140698131e7a35615d88d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 00:18:17 -0700 Subject: [PATCH 03/44] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6802d84..97cf82c 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ We provides two testing purpose backupstore based on NFS server and Minio S3 ser Use following command to setup a Minio S3 server for BackupStore after `longhorn-system` was created. ``` -kubectl -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/backupstores/minio-backupstore.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/backupstores/minio-backupstore.yaml ``` Now set `Settings/General/BackupTarget` to From 8374a052636514cc466a179f6ad92557b2ebbcd1 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 08:23:49 -0700 Subject: [PATCH 04/44] Update README.md Correct URL --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 97cf82c..e64bed7 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,12 @@ Create the deployment of Longhorn in your Kubernetes cluster is easy. If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. ``` FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" -curl -s https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml kubectl create -f longhorn.yaml ``` For Google Kubernetes Engine (GKE) users, see [here](#google-kubernetes-engine) before proceed. @@ -127,7 +127,7 @@ Longhorn provides persistent volume directly to Kubernetes through one of the Lo Use following command to create a default Longhorn StorageClass named `longhorn`. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/examples/storageclass.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/storageclass.yaml ``` Then user can create a PVC directly. For example: ``` @@ -189,7 +189,7 @@ We provides two testing purpose backupstore based on NFS server and Minio S3 ser Use following command to setup a Minio S3 server for BackupStore after `longhorn-system` was created. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/backupstores/minio-backupstore.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/backupstores/minio-backupstore.yaml ``` Now set `Settings/General/BackupTarget` to @@ -277,7 +277,7 @@ Make sure all reports `No resources found.` before continuing. ### 3. Uninstall Longhorn System ``` -kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` ## Notes @@ -298,7 +298,7 @@ In which `name@example.com` is the user's account name in GCE, and it's case sen ``` FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" -curl -s https://raw.githubusercontent.com/rancher/longhorn/rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml kubectl create -f longhorn.yaml ``` From 382d2a588ade4f4a25955334c35d8973109ba28d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 08:59:06 -0700 Subject: [PATCH 05/44] Update README.md Format fix. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e64bed7..03d269d 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ Longhorn can be used in Kubernetes to provide persistent storage through either ### Requirement for the CSI driver 1. Kubernetes v1.10+ -1.1 CSI is in beta release for this version of Kubernetes, and enabled by default. + 1. CSI is in beta release for this version of Kubernetes, and enabled by default. 2. Mount Propagation feature gate enabled. -2.1 It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. + 1. It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. 3. If above conditions cannot be met, Longhorn will falls back to use Flexvolume driver. ### Requirement for the Flexvolume driver From 082817fb79053980dee260c93498a667672c1300 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 10:54:19 -0700 Subject: [PATCH 06/44] Update README.md Fix typo for s3. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03d269d..a76b10d 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/dep Now set `Settings/General/BackupTarget` to ``` -http://minio-service.default:9000 +s3://minio-service.default:9000 ``` And `Setttings/General/BackupTargetSecret` to ``` From a5441c30dc621bd9456fd01d682a5f5f341407d8 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 13:40:10 -0700 Subject: [PATCH 07/44] Update README.md Update backupstore URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a76b10d..2ef381e 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/dep Now set `Settings/General/BackupTarget` to ``` -s3://minio-service.default:9000 +s3://backupbucket@us-east-1/backupstore ``` And `Setttings/General/BackupTargetSecret` to ``` From 50354e461112069e6f36e325d2dff31170317307 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 2 Aug 2018 16:39:12 -0700 Subject: [PATCH 08/44] Create ./docs/ for documents And move part of README.md to it. --- README.md | 66 +++-------------------------------------- docs/gke.md | 23 ++++++++++++++ docs/multidisk.md | 26 ++++++++++++++++ docs/troubleshooting.md | 12 ++++++++ 4 files changed, 65 insertions(+), 62 deletions(-) create mode 100644 docs/gke.md create mode 100644 docs/multidisk.md create mode 100644 docs/troubleshooting.md diff --git a/README.md b/README.md index 2ef381e..1bb01c8 100644 --- a/README.md +++ b/README.md @@ -207,36 +207,14 @@ Longhorn supports recurring snapshot and backup for volumes. User only need to s User can find the setting for the recurring snapshot and backup in the `Volume Detail` page. -### Multiple disks support -Longhorn supports to use more than one disk on the nodes to store the volume data. +## Other topics -To add a new disk for a node, heading to `Node` tab, select one of the node, and click the edit disk icon. - -By default, `/var/lib/rancher/longhorn` on the host will be used for storing the volume data. - -To add any additional disks, user needs to: -1. Mount the disk on the host to a certain directory. -2. Add the path of the mounted disk into the disk list of the node. - -Longhorn will detect the storage information (e.g. maximum space, available space) about the disk automatically, and start scheduling to it if it's possible to accomodate the volume in there. A path mounted by the existing disk won't be allowed. - -User can reserve a certain amount of space of the disk to stop Longhorn from using it. It can be set in the `Space Reserved` field for the disk. It's useful for the non-dedicated storage disk on the node. - -Nodes and disks can be excluded from future scheduling. Notice any scheduled storage space won't be released automatically if the scheduling was disabled for the node. - -There are two global settings affect the scheduling of the volume as well. - -`StorageOverProvisioningPercentage` defines the upper bound of `ScheduledStorage / (MaximumStorage - ReservedStorage)` . The default value is `500` (%). That means we can schedule a total of 750 GiB Longhorn volumes on a 200 GiB disk with 50G reserved for the root file system. Because normally people won't use that large amount of data in the volume, and we store the volumes as sparse files. - -`StorageMinimalAvailablePercentage` defines when a disk cannot be scheduled with more volumes. The default value is `10` (%). The bigger value between `MaximumStorage * StorageMinimalAvailablePercentage / 100` and `MaximumStorage - ReservedStorage` will be used to determine if a disk is running low and cannot be scheduled with more volumes. - -Notice currently there is no guarantee that the space volumes used won't exceed the `StorageMinimalAvailablePercentage`, because: -1. Longhorn volume can be bigger than specified size, due to the snapshot contains the old state of the volume -2. And Longhorn is doing over-provisioning by default. +### [Multiple disks support](./docs/multidisk.md) +### [Google Kubernetes Engine](./docs/gke.md) +### [Troubleshotting](./docs/troubleshooting.md) ## Uninstall Longhorn - Longhorn CRD has finalizers in them, so user should delete the volumes and related resource first, give manager a chance to clean up after them. ### 1. Clean up volume and related resources @@ -280,42 +258,6 @@ Make sure all reports `No resources found.` before continuing. kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` -## Notes -### Google Kubernetes Engine - -The configuration yaml will be slight different for Google Kubernetes Engine (GKE): - -1. GKE requires user to manually claim himself as cluster admin to enable RBAC. User need to execute following command before create the Longhorn system using yaml files. - -``` -kubectl create clusterrolebinding cluster-admin-binding --clusterrole=cluster-admin --user= - -``` - -In which `name@example.com` is the user's account name in GCE, and it's case sensitive. See [here](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control) for details. - -2. The default Flexvolume plugin directory is different with GKE 1.8+, which is at `/home/kubernetes/flexvolume`. User need to use following command instead: - -``` -FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" -curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml -kubectl create -f longhorn.yaml -``` - -See [Troubleshooting](#troubleshooting) for details. - -## Troubleshooting - -### Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it - -Check if volume plugin directory has been set correctly. - -By default, Kubernetes use `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` as the directory for volume plugin drivers, as stated in the [official document](https://github.com/kubernetes/community/blob/master/contributors/devel/flexvolume.md#prerequisites). - -But some vendors may choose to change the directory due to various reasons. For example, GKE uses `/home/kubernetes/flexvolume`, and RKE uses `/var/lib/kubelet/volumeplugins`. - -User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. - ## License Copyright (c) 2014-2018 [Rancher Labs, Inc.](http://rancher.com/) diff --git a/docs/gke.md b/docs/gke.md new file mode 100644 index 0000000..1d266f9 --- /dev/null +++ b/docs/gke.md @@ -0,0 +1,23 @@ +# Google Kubernetes Engine + +The configuration yaml will be slight different for Google Kubernetes Engine (GKE): + +1. GKE requires user to manually claim himself as cluster admin to enable RBAC. User need to execute following command before create the Longhorn system using yaml files. + +``` +kubectl create clusterrolebinding cluster-admin-binding --clusterrole=cluster-admin --user= + +``` + +In which `name@example.com` is the user's account name in GCE, and it's case sensitive. See [here](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control) for details. + +2. The default Flexvolume plugin directory is different with GKE 1.8+, which is at `/home/kubernetes/flexvolume`. User need to use following command instead: + +``` +FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" +curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +kubectl create -f longhorn.yaml +``` + +See [Troubleshooting](./troubleshooting.md) for details. + diff --git a/docs/multidisk.md b/docs/multidisk.md new file mode 100644 index 0000000..0221f50 --- /dev/null +++ b/docs/multidisk.md @@ -0,0 +1,26 @@ +# Multiple disks support +Longhorn supports to use more than one disk on the nodes to store the volume data. + +To add a new disk for a node, heading to `Node` tab, select one of the node, and click the edit disk icon. + +By default, `/var/lib/rancher/longhorn` on the host will be used for storing the volume data. + +To add any additional disks, user needs to: +1. Mount the disk on the host to a certain directory. +2. Add the path of the mounted disk into the disk list of the node. + +Longhorn will detect the storage information (e.g. maximum space, available space) about the disk automatically, and start scheduling to it if it's possible to accomodate the volume in there. A path mounted by the existing disk won't be allowed. + +User can reserve a certain amount of space of the disk to stop Longhorn from using it. It can be set in the `Space Reserved` field for the disk. It's useful for the non-dedicated storage disk on the node. + +Nodes and disks can be excluded from future scheduling. Notice any scheduled storage space won't be released automatically if the scheduling was disabled for the node. + +There are two global settings affect the scheduling of the volume as well. + +`StorageOverProvisioningPercentage` defines the upper bound of `ScheduledStorage / (MaximumStorage - ReservedStorage)` . The default value is `500` (%). That means we can schedule a total of 750 GiB Longhorn volumes on a 200 GiB disk with 50G reserved for the root file system. Because normally people won't use that large amount of data in the volume, and we store the volumes as sparse files. + +`StorageMinimalAvailablePercentage` defines when a disk cannot be scheduled with more volumes. The default value is `10` (%). The bigger value between `MaximumStorage * StorageMinimalAvailablePercentage / 100` and `MaximumStorage - ReservedStorage` will be used to determine if a disk is running low and cannot be scheduled with more volumes. + +Notice currently there is no guarantee that the space volumes used won't exceed the `StorageMinimalAvailablePercentage`, because: +1. Longhorn volume can be bigger than specified size, due to the snapshot contains the old state of the volume +2. And Longhorn is doing over-provisioning by default. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..7c26a16 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,12 @@ +## Troubleshooting + +### Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it + +Check if volume plugin directory has been set correctly. + +By default, Kubernetes use `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` as the directory for volume plugin drivers, as stated in the [official document](https://github.com/kubernetes/community/blob/master/contributors/devel/flexvolume.md#prerequisites). + +But some vendors may choose to change the directory due to various reasons. For example, GKE uses `/home/kubernetes/flexvolume`, and RKE uses `/var/lib/kubelet/volumeplugins`. + +User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. + From 1c87568c866c0078ac4a6e7c309f7e25527b6294 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Sat, 4 Aug 2018 00:10:50 -0700 Subject: [PATCH 09/44] Sync with Longhorn Manager Manager commit: commit 193dc5704ef033196300780fa5f956ba8c32b828 Author: Sheng Yang Date: Fri Aug 3 23:02:16 2018 -0700 Update images Manager to: rancher/longhorn-manager:298b65f Engine to: rancher/longhorn-engine:e58683a UI to: rancher/longhorn-ui:829ebc8 --- deploy/longhorn.yaml | 14 ++++++------ examples/flexvolume/example_baseimage.yaml | 26 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 examples/flexvolume/example_baseimage.yaml diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index c95d3af..56a07a4 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:06a81b9 + image: rancher/longhorn-manager:298b65f imagePullPolicy: Always securityContext: privileged: true @@ -187,9 +187,9 @@ spec: - -d - daemon - --engine-image - - rancher/longhorn-engine:31c42f0 + - rancher/longhorn-engine:e58683a - --manager-image - - rancher/longhorn-manager:06a81b9 + - rancher/longhorn-manager:298b65f - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:47e0b2a + image: rancher/longhorn-ui:829ebc8 ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:06a81b9 + image: rancher/longhorn-manager:298b65f command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:06a81b9 + image: rancher/longhorn-manager:298b65f imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:06a81b9 + - rancher/longhorn-manager:298b65f - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" diff --git a/examples/flexvolume/example_baseimage.yaml b/examples/flexvolume/example_baseimage.yaml new file mode 100644 index 0000000..2d25980 --- /dev/null +++ b/examples/flexvolume/example_baseimage.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Pod +metadata: + name: flexvol-baseimage + namespace: default +spec: + containers: + - name: flexvol-baseimage + image: nginx:stable-alpine + imagePullPolicy: IfNotPresent + volumeMounts: + - name: flexvol + mountPath: /usr/share/nginx/html + ports: + - containerPort: 80 + volumes: + - name: flexvol + flexVolume: + driver: "rancher.io/longhorn" + # fsType: "iso9660" + options: + size: "16Mi" + numberOfReplicas: "3" + staleReplicaTimeout: "20" + fromBackup: "" + baseImage: "rancher/longhorn-test-baseimage" From 4e9738962118baf7e427699f9ceb39ddf8571842 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Sat, 4 Aug 2018 00:31:04 -0700 Subject: [PATCH 10/44] Update README.md Add link to the example pvc and show the minio-secret yaml file. --- README.md | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1bb01c8..6e885f8 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,14 @@ Use following command to create a default Longhorn StorageClass named `longhorn` ``` kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/storageclass.yaml ``` -Then user can create a PVC directly. For example: + +Now you can create a pod using Longhorn like this: +``` +kubectl create -f https://github.com/rancher/longhorn/blob/v0.3-rc/examples/pvc.yaml +``` + +The yaml contains two parts: +1. Create a PVC using Longhorn StorageClass. ``` apiVersion: v1 kind: PersistentVolumeClaim @@ -144,7 +151,7 @@ spec: storage: 2Gi ``` -Then use it in the pod: +2. Use it in the a Pod as a persistent volume: ``` apiVersion: v1 kind: Pod @@ -202,6 +209,21 @@ minio-secret ``` Click the `Backup` tab in the UI, it should report an empty list without error out. +The `minio-secret` yaml looks like this: +``` +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret + namespace: longhorn-system +type: Opaque +data: + AWS_ACCESS_KEY_ID: bG9uZ2hvcm4tdGVzdC1hY2Nlc3Mta2V5 # longhorn-test-access-key + AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key + AWS_ENDPOINTS: aHR0cDovL21pbmlvLXNlcnZpY2UuZGVmYXVsdDo5MDAw # http://minio-service.default:9000 +``` +Notice the secret must be created in the `longhorn-system` namespace for Longhorn to access. + ### Recurring Snapshot and Backup Longhorn supports recurring snapshot and backup for volumes. User only need to set when he/she wish to take the snapshot and/or backup, and how many snapshots/backups needs to be retains, then Longhorn will automatically create snapshot/backup for the user at that time, as long as the volume is attached to a node. From a009f02e71d22637ce1892e47bad3213cd06c932 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Sat, 4 Aug 2018 00:32:54 -0700 Subject: [PATCH 11/44] Update README.md Correct link. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e885f8..0145551 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/exa Now you can create a pod using Longhorn like this: ``` -kubectl create -f https://github.com/rancher/longhorn/blob/v0.3-rc/examples/pvc.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/pvc.yaml ``` The yaml contains two parts: From 7cf33f33ddbea89ce6fa52b03c6dc2a084acbdc0 Mon Sep 17 00:00:00 2001 From: Michael William Le Nguyen Date: Mon, 6 Aug 2018 17:31:11 -0700 Subject: [PATCH 12/44] Add documentation for restoring for Stateful Set --- README.md | 3 +- docs/restore_statefulset.md | 221 ++++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 docs/restore_statefulset.md diff --git a/README.md b/README.md index 0145551..2fda35e 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ data: AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key AWS_ENDPOINTS: aHR0cDovL21pbmlvLXNlcnZpY2UuZGVmYXVsdDo5MDAw # http://minio-service.default:9000 ``` -Notice the secret must be created in the `longhorn-system` namespace for Longhorn to access. +Notice the secret must be created in the `longhorn-system` namespace for Longhorn to access. ### Recurring Snapshot and Backup Longhorn supports recurring snapshot and backup for volumes. User only need to set when he/she wish to take the snapshot and/or backup, and how many snapshots/backups needs to be retains, then Longhorn will automatically create snapshot/backup for the user at that time, as long as the volume is attached to a node. @@ -234,6 +234,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ### [Multiple disks support](./docs/multidisk.md) ### [Google Kubernetes Engine](./docs/gke.md) ### [Troubleshotting](./docs/troubleshooting.md) +### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) ## Uninstall Longhorn diff --git a/docs/restore_statefulset.md b/docs/restore_statefulset.md new file mode 100644 index 0000000..df880ad --- /dev/null +++ b/docs/restore_statefulset.md @@ -0,0 +1,221 @@ +# Restoring Volumes for Kubernetes Stateful Sets + +Longhorn supports restoring backups, and one of the use cases for this feature +is to restore data for use in a Kubernetes `Stateful Set`, which requires +restoring a volume for each replica that was backed up. + +To restore, follow the below instructions based on which plugin you have +deployed. The example below uses a Stateful Set with one volume attached to +each Pod and two replicas. + +- [CSI Instructions](#csi-instructions) +- [FlexVolume Instructions](#flexvolume-instructions) + +### CSI Instructions +1. Connect to the `Longhorn UI` page in your web browser. Under the `Backup` tab, +select the name of the Stateful Set volume. Click the dropdown menu of the +volume entry and restore it. Name the volume something that can easily be +referenced later for the `Persistent Volumes`. + - Repeat this step for each volume you need restored. + - For example, if restoring a Stateful Set with two replicas that had + volumes named `pvc-01a` and `pvc-02b`, the restore could look like this: + +| Backup Name | Restored Volume | +|-------------|-------------------| +| pvc-01a | statefulset-vol-0 | +| pvc-02b | statefulset-vol-1 | + +2. In Kubernetes, create a `Persistent Volume` for each Longhorn volume that was +created. Name the volumes something that can easily be referenced later for the +`Persistent Volume Claims`. `storage` capacity, `numberOfReplicas`, +`storageClassName`, and `volumeHandle` must be replaced below. In the example, +we're referencing `statefulset-vol-0` and `statefulset-vol-1` in Longhorn and +using `longhorn` as our `storageClassName`. + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: statefulset-vol-0 +spec: + capacity: + storage: # must match size of Longhorn volume + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + csi: + driver: io.rancher.longhorn # driver must match this + fsType: ext4 + volumeAttributes: + numberOfReplicas: # must match Longhorn volume value + staleReplicaTimeout: '30' + volumeHandle: statefulset-vol-0 # must match volume name from Longhorn + storageClassName: longhorn # must be same name that we will use later +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: statefulset-vol-1 +spec: + capacity: + storage: # must match size of Longhorn volume + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + csi: + driver: io.rancher.longhorn # driver must match this + fsType: ext4 + volumeAttributes: + numberOfReplicas: # must match Longhorn volume value + staleReplicaTimeout: '30' + volumeHandle: statefulset-vol-1 # must match volume name from Longhorn + storageClassName: longhorn # must be same name that we will use later +``` + +3. Go to [General Instructions](#general-instructions). + +### FlexVolume Instructions +Because of the implementation of `FlexVolume`, creating the Longhorn volumes +from the `Longhorn UI` manually can be skipped. Instead, follow these +instructions: +1. Connect to the `Longhorn UI` page in your web browser. Under the `Backup` tab, +select the name of the `Stateful Set` volume. Click the dropdown menu of the +volume entry and select `Get URL`. + - Repeat this step for each volume you need restored. Save these URLs for the + next step. + - If using NFS backups, the URL will appear similar to: + - `nfs://longhorn-nfs-svc.default:/opt/backupstore?backup=backup-c57844b68923408f&volume=pvc-59b20247-99bf-11e8-8a92-be8835d7412a`. + - If using S3 backups, the URL will appear similar to: + - `s3://backupbucket@us-east-1/backupstore?backup=backup-1713a64cd2774c43&volume=longhorn-testvol-g1n1de` + +2. Similar to `Step 2` for CSI, create a `Persistent Volume` for each volume you +want to restore. `storage` capacity, `storageClassName`, and the FlexVolume +`options` must be replaced. This example uses `longhorn` as the +`storageClassName`. + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: statefulset-vol-0 +spec: + capacity: + storage: # must match "size" parameter below + accessModes: + - ReadWriteOnce + storageClassName: longhorn # must be same name that we will use later + flexVolume: + driver: "rancher.io/longhorn" # driver must match this + fsType: "ext4" + options: + size: # must match "storage" parameter above + numberOfReplicas: + staleReplicaTimeout: + fromBackup: # must be set to Longhorn backup URL +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: statefulset-vol-1 +spec: + capacity: + storage: # must match "size" parameter below + accessModes: + - ReadWriteOnce + storageClassName: longhorn # must be same name that we will use later + flexVolume: + driver: "rancher.io/longhorn" # driver must match this + fsType: "ext4" + options: + size: # must match "storage" parameter above + numberOfReplicas: + staleReplicaTimeout: + fromBackup: # must be set to Longhorn backup URL +``` + +3. Go to [General Instructions](#general_instructions). + +### General Instructions +**Make sure you have followed either the [CSI](#csi-instructions) or +[FlexVolume](#flexvolume-instructions) instructions before following the steps +in this section.** + +1. In the `namespace` the `Stateful Set` will be deployed in, create Persistent +Volume Claims **for each** `Persistent Volume`. + - The name of the `Persistent Volume Claim` must follow this naming scheme: + `--`. Stateful + Set Pods are zero-indexed. In this example, the name of the `Volume Claim + Template` is `data`, the name of the `Stateful Set` is `webapp`, and there + are two replicas, which are indexes `0` and `1`. + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: data-webapp-0 + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi # must match size from earlier + storageClassName: longhorn # must match name from earlier + volumeName: statefulset-vol-0 # must reference Persistent Volume +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: data-webapp-1 + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi # must match size from earlier + storageClassName: longhorn # must match name from earlier + volumeName: statefulset-vol-1 # must reference Persistent Volume +``` + +2. Create the `Stateful Set`: + +```yaml +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + name: webapp # match this with the pvc naming scheme +spec: + selector: + matchLabels: + app: nginx # has to match .spec.template.metadata.labels + serviceName: "nginx" + replicas: 2 # by default is 1 + template: + metadata: + labels: + app: nginx # has to match .spec.selector.matchLabels + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: nginx + image: k8s.gcr.io/nginx-slim:0.8 + ports: + - containerPort: 80 + name: web + volumeMounts: + - name: data + mountPath: /usr/share/nginx/html + volumeClaimTemplates: + - metadata: + name: data # match this with the pvc naming scheme + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: longhorn # must match name from earlier + resources: + requests: + storage: 2Gi # must match size from earlier +``` + +The restored data should now be accessible from inside the `Stateful Set` +`Pods`. From 9c9b77b120efe5d5f5c3dc4f5c446257b705045e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 8 Aug 2018 00:23:04 -0700 Subject: [PATCH 13/44] Add docs for upgrade and iSCSI --- README.md | 2 + docs/iscsi.md | 24 ++++++++++++ docs/upgrade.md | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 docs/iscsi.md create mode 100644 docs/upgrade.md diff --git a/README.md b/README.md index 2fda35e..32f1a15 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,9 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Other topics +### [Upgrade from v0.2](./docs/upgrade.md) ### [Multiple disks support](./docs/multidisk.md) +### [iSCSI support](./docs/iscsi.md) ### [Google Kubernetes Engine](./docs/gke.md) ### [Troubleshotting](./docs/troubleshooting.md) ### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) diff --git a/docs/iscsi.md b/docs/iscsi.md new file mode 100644 index 0000000..ac51bc1 --- /dev/null +++ b/docs/iscsi.md @@ -0,0 +1,24 @@ +# iSCSI support + +Longhorn supports iSCSI target frontend mode. The user can connect to it +through any iSCSI client, including open-iscsi, and virtual machine +hypervisor like KVM, as long as it's in the same network with the Longhorn system. + +Longhorn Driver (CSI/Flexvolume) doesn't support iSCSI mode. + +To start volume with iSCSI target frontend mode, select `iSCSI` as the frontend +when creating the volume. After volume has been attached, the user will see +something like following in the `endpoint` field: + +``` +iscsi://10.42.0.21:3260/iqn.2014-09.com.rancher:testvolume/1 +``` + +Here: +1. The IP and port is `10.42.0.21:3260`. +2. The target name is `iqn.2014-09.com.rancher:testvolume`. `testvolume` is the + name of the volume. +3. The LUN number is 1. Longhorn always uses LUN 1. + +Then user can use above information to connect to the iSCSI target provided by +Longhorn using an iSCSI client. diff --git a/docs/upgrade.md b/docs/upgrade.md new file mode 100644 index 0000000..cefdc96 --- /dev/null +++ b/docs/upgrade.md @@ -0,0 +1,100 @@ +# Upgrade + +Here we would cover how to upgrade from Longhorn v0.2 to Longhorn v0.3 release. + +## Backup your existing data +1. It's recommended to create a latest backup for every volume to the backupstore before upgrade. +2. Make sure no volume is in degraded or faulted state. +3. Shutdown related Kubernetes pods. Detach all the volumes. Make sure all the volumes are detached before proceeding. +4. Backup CRD yaml to local directory: +``` +kubectl -n longhorn-system get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml +kubectl -n longhorn-system get engines.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-engines.yaml +kubectl -n longhorn-system get replicas.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-replicas.yaml +kubectl -n longhorn-system get settings.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-settings.yaml +``` +5. Noted the value of BackupTarget in the setting. The user would need to reset after upgrade. + +## Upgrade from v0.2 to v0.3 + +Please be aware that the upgrade will incur API downtime. + +### 1. Remove the old manager +``` +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.2/deploy/uninstall-for-upgrade.yaml +``` + +### 2. Install the new manager + +We will use `kubectl apply` instead of `kubectl create` to install the new version of the manager. + +If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: +``` +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml +``` +If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. +``` +FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" +curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +kubectl apply -f longhorn.yaml +``` + +For Google Kubernetes Engine (GKE) users, see [here](../gke.md) before proceed. + +Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. + +When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. + +Deployed with CSI driver: +``` +# kubectl -n longhorn-system get pod +NAME READY STATUS RESTARTS AGE +csi-attacher-0 1/1 Running 0 6h +csi-provisioner-0 1/1 Running 0 6h +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-csi-plugin-4cpk2 2/2 Running 0 6h +longhorn-csi-plugin-ll6mq 2/2 Running 0 6h +longhorn-csi-plugin-smlsh 2/2 Running 0 6h +longhorn-driver-deployer-7b5bdcccc8-fbncl 1/1 Running 0 6h +longhorn-manager-7x8x8 1/1 Running 0 6h +longhorn-manager-8kqf4 1/1 Running 0 6h +longhorn-manager-kln4h 1/1 Running 0 6h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +``` +Or with Flexvolume driver +``` +# kubectl -n longhorn-system get pod +NAME READY STATUS RESTARTS AGE +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-driver-deployer-5469b87b9c-b9gm7 1/1 Running 0 2h +longhorn-flexvolume-driver-lth5g 1/1 Running 0 2h +longhorn-flexvolume-driver-tpqf7 1/1 Running 0 2h +longhorn-flexvolume-driver-v9mrj 1/1 Running 0 2h +longhorn-manager-7x8x8 1/1 Running 0 9h +longhorn-manager-8kqf4 1/1 Running 0 9h +longhorn-manager-kln4h 1/1 Running 0 9h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +``` + +### 3. Upgrade Engine Images and set BackupTarget + +1. Wait until the UI is up. +2. Set the BackupTarget in the setting to the same value as before upgrade. +3. Make all the volumes are all detached. +4. Select all the volumes using batch selection. Click batch operation button + `Upgrade Engine`, choose the only engine image available in the list. It's + the default engine shipped with the manager for this release. +5. Now attach the volume one by one, to see if the volume works correctly. + +## Note + +Upgrade is always tricky. Keep backups for the volumes are critical. + +If you have any issues, please reported it at +https://github.com/rancher/longhorn/issues , with your backup yaml files as well +as manager logs. + From 7d4afb10ac2143abd8986a875602c121118e3c46 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 8 Aug 2018 00:30:01 -0700 Subject: [PATCH 14/44] Update upgrade.md Fix a typo. --- docs/upgrade.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index cefdc96..8c83a71 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -39,7 +39,7 @@ curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longho kubectl apply -f longhorn.yaml ``` -For Google Kubernetes Engine (GKE) users, see [here](../gke.md) before proceed. +For Google Kubernetes Engine (GKE) users, see [here](./gke.md) before proceed. Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. From 86d57e9ff4a95c4615730ed00a4f705029cdad96 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 8 Aug 2018 03:52:37 -0700 Subject: [PATCH 15/44] Sync with Longhorn Manager Manager commit: commit 34bac041a58b1086361f6d7d653fcd9954ff104b Author: Sheng Yang Date: Wed Aug 8 01:09:34 2018 -0700 Update image Manager to rancher/longhorn-manager:9838a5f UI to rancher/longhorn-ui:99252cc --- deploy/longhorn.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index 56a07a4..54b82d2 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:298b65f + image: rancher/longhorn-manager:9838a5f imagePullPolicy: Always securityContext: privileged: true @@ -189,7 +189,7 @@ spec: - --engine-image - rancher/longhorn-engine:e58683a - --manager-image - - rancher/longhorn-manager:298b65f + - rancher/longhorn-manager:9838a5f - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:829ebc8 + image: rancher/longhorn-ui:99252cc ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:298b65f + image: rancher/longhorn-manager:9838a5f command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:298b65f + image: rancher/longhorn-manager:9838a5f imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:298b65f + - rancher/longhorn-manager:9838a5f - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" From ac838a35a96a2059fb2dfcd91e450d8511f4833f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Aug 2018 13:33:20 -0700 Subject: [PATCH 16/44] docs: Remove Upgrade instruction for now It's not ready yet. --- README.md | 1 - docs/upgrade.md | 100 ------------------------------------------------ 2 files changed, 101 deletions(-) delete mode 100644 docs/upgrade.md diff --git a/README.md b/README.md index 32f1a15..8e14c58 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,6 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Other topics -### [Upgrade from v0.2](./docs/upgrade.md) ### [Multiple disks support](./docs/multidisk.md) ### [iSCSI support](./docs/iscsi.md) ### [Google Kubernetes Engine](./docs/gke.md) diff --git a/docs/upgrade.md b/docs/upgrade.md deleted file mode 100644 index 8c83a71..0000000 --- a/docs/upgrade.md +++ /dev/null @@ -1,100 +0,0 @@ -# Upgrade - -Here we would cover how to upgrade from Longhorn v0.2 to Longhorn v0.3 release. - -## Backup your existing data -1. It's recommended to create a latest backup for every volume to the backupstore before upgrade. -2. Make sure no volume is in degraded or faulted state. -3. Shutdown related Kubernetes pods. Detach all the volumes. Make sure all the volumes are detached before proceeding. -4. Backup CRD yaml to local directory: -``` -kubectl -n longhorn-system get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml -kubectl -n longhorn-system get engines.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-engines.yaml -kubectl -n longhorn-system get replicas.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-replicas.yaml -kubectl -n longhorn-system get settings.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-settings.yaml -``` -5. Noted the value of BackupTarget in the setting. The user would need to reset after upgrade. - -## Upgrade from v0.2 to v0.3 - -Please be aware that the upgrade will incur API downtime. - -### 1. Remove the old manager -``` -kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.2/deploy/uninstall-for-upgrade.yaml -``` - -### 2. Install the new manager - -We will use `kubectl apply` instead of `kubectl create` to install the new version of the manager. - -If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: -``` -kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml -``` -If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. -``` -FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" -curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml -kubectl apply -f longhorn.yaml -``` - -For Google Kubernetes Engine (GKE) users, see [here](./gke.md) before proceed. - -Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. - -When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. - -Deployed with CSI driver: -``` -# kubectl -n longhorn-system get pod -NAME READY STATUS RESTARTS AGE -csi-attacher-0 1/1 Running 0 6h -csi-provisioner-0 1/1 Running 0 6h -engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d -engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d -engine-image-ei-57b85e25-t2787 1/1 Running 0 7d -longhorn-csi-plugin-4cpk2 2/2 Running 0 6h -longhorn-csi-plugin-ll6mq 2/2 Running 0 6h -longhorn-csi-plugin-smlsh 2/2 Running 0 6h -longhorn-driver-deployer-7b5bdcccc8-fbncl 1/1 Running 0 6h -longhorn-manager-7x8x8 1/1 Running 0 6h -longhorn-manager-8kqf4 1/1 Running 0 6h -longhorn-manager-kln4h 1/1 Running 0 6h -longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d -``` -Or with Flexvolume driver -``` -# kubectl -n longhorn-system get pod -NAME READY STATUS RESTARTS AGE -engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d -engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d -engine-image-ei-57b85e25-t2787 1/1 Running 0 7d -longhorn-driver-deployer-5469b87b9c-b9gm7 1/1 Running 0 2h -longhorn-flexvolume-driver-lth5g 1/1 Running 0 2h -longhorn-flexvolume-driver-tpqf7 1/1 Running 0 2h -longhorn-flexvolume-driver-v9mrj 1/1 Running 0 2h -longhorn-manager-7x8x8 1/1 Running 0 9h -longhorn-manager-8kqf4 1/1 Running 0 9h -longhorn-manager-kln4h 1/1 Running 0 9h -longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d -``` - -### 3. Upgrade Engine Images and set BackupTarget - -1. Wait until the UI is up. -2. Set the BackupTarget in the setting to the same value as before upgrade. -3. Make all the volumes are all detached. -4. Select all the volumes using batch selection. Click batch operation button - `Upgrade Engine`, choose the only engine image available in the list. It's - the default engine shipped with the manager for this release. -5. Now attach the volume one by one, to see if the volume works correctly. - -## Note - -Upgrade is always tricky. Keep backups for the volumes are critical. - -If you have any issues, please reported it at -https://github.com/rancher/longhorn/issues , with your backup yaml files as well -as manager logs. - From 4a3078a84b04d55899b6d75aceea0963c07ef1c4 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Aug 2018 17:14:42 -0700 Subject: [PATCH 17/44] Update README.md Add instruction on how to find out if host supports MountPropagation. --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 8e14c58..1c18752 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,24 @@ Longhorn can be used in Kubernetes to provide persistent storage through either 1. It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. 3. If above conditions cannot be met, Longhorn will falls back to use Flexvolume driver. +### Check if your setup satisfied CSI requirement +1. Use the following command to check your Kubernetes server version +``` +> kubectl version +Client Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.3", GitCommit:"2bba0127d85d5a46ab4b778548be28623b32d0b0", GitTreeState:"clean", BuildDate:"2018-05-21T09:17:39Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} +Server Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.1", GitCommit:"d4ab47518836c750f9949b9e0d387f20fb92260b", GitTreeState:"clean", BuildDate:"2018-04-12T14:14:26Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} +``` +The `Server Version` should be `v1.10` or above. + +2. Use the following command on the hosts to check if the feature gate is enabled for Mount Propagation +``` +> ps aux|grep kube|grep MountPropagation +root 1707 3.1 12.4 1087008 503848 ? Ssl Jul12 1288:35 kube-apiserver --storage-backend=etcd3 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --tls-cert-file=/etc/kubernetes/ssl/kube-apiserver.pem --kubelet-client-certificate=/etc/kubernetes/ssl/kube-apiserver.pem --apiserver-count=1 --secure-port=6443 --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname --kubelet-client-key=/etc/kubernetes/ssl/kube-apiserver-key.pem --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --allow-privileged=true --insecure-port=0 --admission-control=ServiceAccount,NamespaceLifecycle,LimitRanger,PersistentVolumeLabel,DefaultStorageClass,ResourceQuota,DefaultTolerationSeconds --cloud-provider= --service-cluster-ip-range=10.43.0.0/16 --tls-private-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --service-account-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --authorization-mode=Node,RBAC --bind-address=0.0.0.0 --feature-gates=MountPropagation=true --insecure-bind-address=127.0.0.1 --etcd-cafile=/etc/kubernetes/ssl/kube-ca.pem --etcd-certfile=/etc/kubernetes/ssl/kube-node.pem --etcd-keyfile=/etc/kubernetes/ssl/kube-node-key.pem --etcd-servers=https://138.197.199.191:2379 --etcd-prefix=/registry +root 1760 4.7 6.4 1508564 260724 ? Ssl Jul12 1970:59 kubelet --network-plugin=cni --resolv-conf=/etc/resolv.conf --cluster-domain=cluster.local --v=2 --enforce-node-allocatable= --cgroups-per-qos=True --cni-bin-dir=/opt/cni/bin --cluster-dns=10.43.0.10 --cloud-provider= --fail-swap-on=false --address=0.0.0.0 --cadvisor-port=0 --volume-plugin-dir=/var/lib/kubelet/volumeplugins --hostname-override=yasker-longhorn-dev-1 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --root-dir=/var/lib/kubelet --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --feature-gates=MountPropagation=true --cni-conf-dir=/etc/cni/net.d --allow-privileged=true --pod-infra-container-image=rancher/pause-amd64:3.0 --kubeconfig=/etc/kubernetes/ssl/kubecfg-kube-node.yaml --read-only-port=0 --anonymous-auth=false --cgroup-driver=cgroupfs +``` +Both `kube-apiserver` and `kubelet` should have `--feature-gates=MountPropagation=true` + + ### Requirement for the Flexvolume driver 1. Kubernetes v1.8+ From f694739060dbd00d3aafd21da6a34ce5e0bbf853 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Aug 2018 17:15:57 -0700 Subject: [PATCH 18/44] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1c18752..719b8bf 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Longhorn can be used in Kubernetes to provide persistent storage through either ### Check if your setup satisfied CSI requirement 1. Use the following command to check your Kubernetes server version ``` -> kubectl version +# kubectl version Client Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.3", GitCommit:"2bba0127d85d5a46ab4b778548be28623b32d0b0", GitTreeState:"clean", BuildDate:"2018-05-21T09:17:39Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} Server Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.1", GitCommit:"d4ab47518836c750f9949b9e0d387f20fb92260b", GitTreeState:"clean", BuildDate:"2018-04-12T14:14:26Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} ``` @@ -50,7 +50,7 @@ The `Server Version` should be `v1.10` or above. 2. Use the following command on the hosts to check if the feature gate is enabled for Mount Propagation ``` -> ps aux|grep kube|grep MountPropagation +# ps aux|grep kube|grep MountPropagation root 1707 3.1 12.4 1087008 503848 ? Ssl Jul12 1288:35 kube-apiserver --storage-backend=etcd3 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --tls-cert-file=/etc/kubernetes/ssl/kube-apiserver.pem --kubelet-client-certificate=/etc/kubernetes/ssl/kube-apiserver.pem --apiserver-count=1 --secure-port=6443 --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname --kubelet-client-key=/etc/kubernetes/ssl/kube-apiserver-key.pem --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --allow-privileged=true --insecure-port=0 --admission-control=ServiceAccount,NamespaceLifecycle,LimitRanger,PersistentVolumeLabel,DefaultStorageClass,ResourceQuota,DefaultTolerationSeconds --cloud-provider= --service-cluster-ip-range=10.43.0.0/16 --tls-private-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --service-account-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --authorization-mode=Node,RBAC --bind-address=0.0.0.0 --feature-gates=MountPropagation=true --insecure-bind-address=127.0.0.1 --etcd-cafile=/etc/kubernetes/ssl/kube-ca.pem --etcd-certfile=/etc/kubernetes/ssl/kube-node.pem --etcd-keyfile=/etc/kubernetes/ssl/kube-node-key.pem --etcd-servers=https://138.197.199.191:2379 --etcd-prefix=/registry root 1760 4.7 6.4 1508564 260724 ? Ssl Jul12 1970:59 kubelet --network-plugin=cni --resolv-conf=/etc/resolv.conf --cluster-domain=cluster.local --v=2 --enforce-node-allocatable= --cgroups-per-qos=True --cni-bin-dir=/opt/cni/bin --cluster-dns=10.43.0.10 --cloud-provider= --fail-swap-on=false --address=0.0.0.0 --cadvisor-port=0 --volume-plugin-dir=/var/lib/kubelet/volumeplugins --hostname-override=yasker-longhorn-dev-1 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --root-dir=/var/lib/kubelet --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --feature-gates=MountPropagation=true --cni-conf-dir=/etc/cni/net.d --allow-privileged=true --pod-infra-container-image=rancher/pause-amd64:3.0 --kubeconfig=/etc/kubernetes/ssl/kubecfg-kube-node.yaml --read-only-port=0 --anonymous-auth=false --cgroup-driver=cgroupfs ``` From 01d46606602a4482c20418c411830485a7d6bc3a Mon Sep 17 00:00:00 2001 From: James Oliver Date: Thu, 9 Aug 2018 17:49:42 -0700 Subject: [PATCH 19/44] Document base image --- README.md | 1 + docs/base-image.md | 250 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 docs/base-image.md diff --git a/README.md b/README.md index 719b8bf..315ad44 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ### [Google Kubernetes Engine](./docs/gke.md) ### [Troubleshotting](./docs/troubleshooting.md) ### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) +### [Base Image support](./docs/base-image.md) ## Uninstall Longhorn diff --git a/docs/base-image.md b/docs/base-image.md new file mode 100644 index 0000000..5c9f68e --- /dev/null +++ b/docs/base-image.md @@ -0,0 +1,250 @@ +# Base Image Support + +Longhorn supports creation of block devices backed by a base image. Longhorn +base images are packaged as Docker images. Public or private registries may +be used as a distribution mechanism for your Docker base images. + +## Usage + +Volumes backed by a base image can be created in three ways. + +1. [UI](#ui) - Create Longhorn volumes exposed as block device or iSCSI target +2. [FlexVolume Driver](#flexvolume-driver) - Create Longhorn block devices and consume in Kubernetes pods +3. [CSI Driver](#csi-driver) - (Newer) Create Longhorn block devices and consume in Kubernetes pods + +### UI + +On the `Volume` tab, click the `Create Volume` button. The `Base Image` field +expects a Docker image name such as `rancher/vm-ubuntu:16.04.4-server-amd64`. + +### FlexVolume Driver + +The flexvolume driver supports volumes backed by base image. Below is a sample +FlexVolume definition including `baseImage` option. + +``` +name: flexvol +flexVolume: + driver: "rancher.io/longhorn" + fsType: "ext4" + options: + size: "32Mi" + numberOfReplicas: "3" + staleReplicaTimeout: "20" + fromBackup: "" + baseImage: "rancher/longhorn-test:baseimage-ext4" +``` + +You do not need to (and probably shouldn't) explicitly set filesystem type +`fsType` when base image is present. If you do, it must match the base image's +filesystem or the flexvolume driver will return an error. + +Try it out for yourself. Make sure the Longhorn driver deployer specifies flag +`--driver flexvolume`, otherwise a different driver may be deployed. The +following example creates an nginx pod serving content from a flexvolume with +a base image and is accessible from a service. + +``` +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/flexvolume/example_baseimage.yaml +``` + +Wait until the pod is running. + +``` +kubectl get po/flexvol-baseimage -w +``` + +Query for the service you created. + +``` +kubectl get svc/flexvol-baseimage +``` + +Your service should look similar. + +``` +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/flexvol-baseimage LoadBalancer 10.43.153.186 80:31028/TCP 2m +``` + +Now let's access something packaged inside the base image through the Nginx +webserver, exposed by the `LoadBalancer` service. If you have LoadBalancer +support and `EXTERNAL-IP` is set, navigate to the following URL. + +``` +http:///guests/hd/party-wizard.gif +``` + +Otherwise, navigate to the following URL where `NODE-IP` is the external IP +address of any Kubernetes node and `NODE-PORT` is the second port in the +service (`31028` in the example service above). + +``` +http://:/guests/hd/party-wizard.gif +``` + +Finally, tear down the pod and service. + +``` +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/flexvolume/example_baseimage.yaml +``` + +### CSI Driver + +The CSI driver supports volumes backed by base image. Below is a sample +StorageClass definition including `baseImage` option. + +``` +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: example +provisioner: rancher.io/longhorn +parameters: + numberOfReplicas: '3' + staleReplicaTimeout: '30' + fromBackup: '' + baseImage: rancher/longhorn-test:baseimage-ext4 +``` + +Let's walk through an example. First, ensure the CSI Plugin is deployed. + +``` +kubectl -n longhorn-system get daemonset.apps/longhorn-csi-plugin +``` + +The following example creates an nginx statefulset with two replicas serving +content from two csi-provisioned volumes backed by a base image. The +statefulset is accessible from a service. + +``` +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/csi/example_baseimage.yaml +``` + +Wait until both pods are running. + +``` +kubectl -l app=csi-baseimage get po -w +``` + +Query for the service you created. + +``` +kubectl get svc/csi-baseimage +``` + +Your service should look similar. + +``` +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +csi-baseimage LoadBalancer 10.43.47.129 80:32768/TCP 4m +``` + +Now let's access something packaged inside the base image through the Nginx +webserver, exposed by the `LoadBalancer` service. If you have LoadBalancer +support and `EXTERNAL-IP` is set, navigate to the following URL. + +``` +http:///guests/hd/party-wizard.gif +``` + +Otherwise, navigate to the following URL where `NODE-IP` is the external IP +address of any Kubernetes node and `NODE-PORT` is the second port in the +service (`32768` in the example service above). + +``` +http://:/guests/hd/party-wizard.gif +``` + +Finally, tear down the pod and service. + +``` +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/csi/example_baseimage.yaml +``` + +## Building + +Creating and packaging an empty base image is a very simple process. + +1. [Install QEMU](https://en.wikibooks.org/wiki/QEMU/Installing_QEMU). +2. Create a qcow2 image. + +``` +qemu-img create -f qcow2 example.qcow2 4G +``` + +3. Create the `Dockerfile` file with the following contents: + +``` +FROM busybox +COPY example.qcow2 /base_image/example.qcow2 +``` + +4. Build and publish the image: + +``` +DOCKERHUB_ACCT=rancher +docker build -t ${DOCKERHUB_ACCT}/longhorn-example:baseimage . +docker push ${DOCKERHUB_ACCT}/longhorn-example:baseimage +``` + +That's it! Your (empty) base image is ready for (no) use. Let's now explore +some use cases for a base image and what we should do to our `example.qcow2` +before building and publishing. + +### Simple Filesystem + +Suppose we want to store some static web assets in a volume. We have our qcow2 +image and the web assets, but how to put the assets in the image? + +On a Linux machine, load the network block device module. + +``` +sudo modprobe nbd +``` + +Use `qemu-nbd` to expose the image as a network block device. + +``` +sudo qemu-nbd -f qcow2 -c /dev/nbd0 example.qcow2 +``` + +The raw block device needs a filesystem. Consider your infrastructure and +choose an appropriate filesystem. We will use EXT4 filesystem. + +``` +sudo mkfs -t ext4 /dev/nbd0 +``` + +Mount the filesystem. + +``` +mkdir -p example +sudo mount /dev/nbd0 example +``` + +Copy web assets to filesystem. + +``` +cp /web/assets/* example/ +``` + +Unmount the filesystem, shutdown `qemu-nbd`, cleanup. + +``` +sudo umount example +sudo killall qemu-nbd +rmdir example +``` + +Optionally, compress the image. + +``` +qemu-img convert -c -O qcow2 example.qcow2 example.compressed.qcow2 +``` + +Follow the build and publish image steps and you are done. [Example script](https://raw.githubusercontent.com/rancher/longhorn-tests/master/manager/test_containers/baseimage/generate.sh). + +### Virtual Machine + +See [this document](https://github.com/rancher/vm/blob/master/docs/images.md) for the basic procedure of preparing Virtual Machine images. From dbb54b4588c51703738b2feb0a3fd363ea773222 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 9 Aug 2018 19:30:42 -0700 Subject: [PATCH 20/44] Revert "docs: Remove Upgrade instruction for now" This reverts commit ac838a35a96a2059fb2dfcd91e450d8511f4833f. Adding back the doc. We're going to release when it's ready. --- README.md | 1 + docs/upgrade.md | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 docs/upgrade.md diff --git a/README.md b/README.md index 315ad44..bc2625f 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Other topics +### [Upgrade from v0.2](./docs/upgrade.md) ### [Multiple disks support](./docs/multidisk.md) ### [iSCSI support](./docs/iscsi.md) ### [Google Kubernetes Engine](./docs/gke.md) diff --git a/docs/upgrade.md b/docs/upgrade.md new file mode 100644 index 0000000..8c83a71 --- /dev/null +++ b/docs/upgrade.md @@ -0,0 +1,100 @@ +# Upgrade + +Here we would cover how to upgrade from Longhorn v0.2 to Longhorn v0.3 release. + +## Backup your existing data +1. It's recommended to create a latest backup for every volume to the backupstore before upgrade. +2. Make sure no volume is in degraded or faulted state. +3. Shutdown related Kubernetes pods. Detach all the volumes. Make sure all the volumes are detached before proceeding. +4. Backup CRD yaml to local directory: +``` +kubectl -n longhorn-system get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml +kubectl -n longhorn-system get engines.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-engines.yaml +kubectl -n longhorn-system get replicas.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-replicas.yaml +kubectl -n longhorn-system get settings.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-settings.yaml +``` +5. Noted the value of BackupTarget in the setting. The user would need to reset after upgrade. + +## Upgrade from v0.2 to v0.3 + +Please be aware that the upgrade will incur API downtime. + +### 1. Remove the old manager +``` +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.2/deploy/uninstall-for-upgrade.yaml +``` + +### 2. Install the new manager + +We will use `kubectl apply` instead of `kubectl create` to install the new version of the manager. + +If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: +``` +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml +``` +If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. +``` +FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" +curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml +kubectl apply -f longhorn.yaml +``` + +For Google Kubernetes Engine (GKE) users, see [here](./gke.md) before proceed. + +Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. + +When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. + +Deployed with CSI driver: +``` +# kubectl -n longhorn-system get pod +NAME READY STATUS RESTARTS AGE +csi-attacher-0 1/1 Running 0 6h +csi-provisioner-0 1/1 Running 0 6h +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-csi-plugin-4cpk2 2/2 Running 0 6h +longhorn-csi-plugin-ll6mq 2/2 Running 0 6h +longhorn-csi-plugin-smlsh 2/2 Running 0 6h +longhorn-driver-deployer-7b5bdcccc8-fbncl 1/1 Running 0 6h +longhorn-manager-7x8x8 1/1 Running 0 6h +longhorn-manager-8kqf4 1/1 Running 0 6h +longhorn-manager-kln4h 1/1 Running 0 6h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +``` +Or with Flexvolume driver +``` +# kubectl -n longhorn-system get pod +NAME READY STATUS RESTARTS AGE +engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d +engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d +engine-image-ei-57b85e25-t2787 1/1 Running 0 7d +longhorn-driver-deployer-5469b87b9c-b9gm7 1/1 Running 0 2h +longhorn-flexvolume-driver-lth5g 1/1 Running 0 2h +longhorn-flexvolume-driver-tpqf7 1/1 Running 0 2h +longhorn-flexvolume-driver-v9mrj 1/1 Running 0 2h +longhorn-manager-7x8x8 1/1 Running 0 9h +longhorn-manager-8kqf4 1/1 Running 0 9h +longhorn-manager-kln4h 1/1 Running 0 9h +longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +``` + +### 3. Upgrade Engine Images and set BackupTarget + +1. Wait until the UI is up. +2. Set the BackupTarget in the setting to the same value as before upgrade. +3. Make all the volumes are all detached. +4. Select all the volumes using batch selection. Click batch operation button + `Upgrade Engine`, choose the only engine image available in the list. It's + the default engine shipped with the manager for this release. +5. Now attach the volume one by one, to see if the volume works correctly. + +## Note + +Upgrade is always tricky. Keep backups for the volumes are critical. + +If you have any issues, please reported it at +https://github.com/rancher/longhorn/issues , with your backup yaml files as well +as manager logs. + From f3ccbe7b28ad36a2a7e0a7a3244d49fb02df94aa Mon Sep 17 00:00:00 2001 From: James Oliver Date: Thu, 9 Aug 2018 19:02:01 -0700 Subject: [PATCH 21/44] fix link and selector --- docs/base-image.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/base-image.md b/docs/base-image.md index 5c9f68e..3fe97ec 100644 --- a/docs/base-image.md +++ b/docs/base-image.md @@ -45,7 +45,7 @@ following example creates an nginx pod serving content from a flexvolume with a base image and is accessible from a service. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/flexvolume/example_baseimage.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/v0.3-rc/examples/flexvolume/example_baseimage.yaml ``` Wait until the pod is running. @@ -86,7 +86,7 @@ http://:/guests/hd/party-wizard.gif Finally, tear down the pod and service. ``` -kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/flexvolume/example_baseimage.yaml +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/v0.3-rc/examples/flexvolume/example_baseimage.yaml ``` ### CSI Driver @@ -118,13 +118,13 @@ content from two csi-provisioned volumes backed by a base image. The statefulset is accessible from a service. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/csi/example_baseimage.yaml +kubectl create -f https://raw.githubusercontent.com/rancher/longhorn-manager/v0.3-rc/examples/provisioner_with_baseimage.yaml ``` Wait until both pods are running. ``` -kubectl -l app=csi-baseimage get po -w +kubectl -l app=provisioner-baseimage get po -w ``` Query for the service you created. @@ -159,7 +159,7 @@ http://:/guests/hd/party-wizard.gif Finally, tear down the pod and service. ``` -kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/master/examples/csi/example_baseimage.yaml +kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn-manager/v0.3-rc/examples/provisioner_with_baseimage.yaml ``` ## Building From 64562f16b19dd2fde9ba6d8dc5d1f6ca03ce06f0 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 10 Aug 2018 22:07:05 -0700 Subject: [PATCH 22/44] Sync with Longhorn Manager Manager commit: commit 807666ba7a13024a218361d963b6c4433b7b45fd Author: Sheng Yang Date: Fri Aug 10 21:57:07 2018 -0700 Update image to rancher/longhorn-manager:v0.3-rc4 Engine image: rancher/longhorn-engine:v0.3-rc4 UI Image: rancher/longhorn-ui:v0.3-rc4 --- deploy/longhorn.yaml | 14 ++--- examples/flexvolume/example_baseimage.yaml | 25 +++++++-- examples/provisioner_with_baseimage.yaml | 63 ++++++++++++++++++++++ 3 files changed, 91 insertions(+), 11 deletions(-) create mode 100644 examples/provisioner_with_baseimage.yaml diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index 54b82d2..d2b12ae 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:9838a5f + image: rancher/longhorn-manager:v0.3-rc4 imagePullPolicy: Always securityContext: privileged: true @@ -187,9 +187,9 @@ spec: - -d - daemon - --engine-image - - rancher/longhorn-engine:e58683a + - rancher/longhorn-engine:v0.3-rc4 - --manager-image - - rancher/longhorn-manager:9838a5f + - rancher/longhorn-manager:v0.3-rc4 - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:99252cc + image: rancher/longhorn-ui:v0.3-rc4 ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:9838a5f + image: rancher/longhorn-manager:v0.3-rc4 command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:9838a5f + image: rancher/longhorn-manager:v0.3-rc4 imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:9838a5f + - rancher/longhorn-manager:v0.3-rc4 - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" diff --git a/examples/flexvolume/example_baseimage.yaml b/examples/flexvolume/example_baseimage.yaml index 2d25980..640d561 100644 --- a/examples/flexvolume/example_baseimage.yaml +++ b/examples/flexvolume/example_baseimage.yaml @@ -1,6 +1,8 @@ apiVersion: v1 kind: Pod metadata: + labels: + app: flexvol-baseimage name: flexvol-baseimage namespace: default spec: @@ -16,11 +18,26 @@ spec: volumes: - name: flexvol flexVolume: - driver: "rancher.io/longhorn" - # fsType: "iso9660" + driver: rancher.io/longhorn options: - size: "16Mi" + size: 32Mi numberOfReplicas: "3" staleReplicaTimeout: "20" fromBackup: "" - baseImage: "rancher/longhorn-test-baseimage" + baseImage: rancher/longhorn-test:baseimage-ext4 +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: flexvol-baseimage + name: flexvol-baseimage + namespace: default +spec: + ports: + - name: web + port: 80 + targetPort: 80 + selector: + app: flexvol-baseimage + type: LoadBalancer diff --git a/examples/provisioner_with_baseimage.yaml b/examples/provisioner_with_baseimage.yaml new file mode 100644 index 0000000..e3ab42b --- /dev/null +++ b/examples/provisioner_with_baseimage.yaml @@ -0,0 +1,63 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + labels: + app: provisioner-baseimage + name: baseimage-storageclass +provisioner: rancher.io/longhorn +parameters: + numberOfReplicas: '3' + staleReplicaTimeout: '30' + fromBackup: '' + baseImage: rancher/longhorn-test:baseimage-ext4 +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: provisioner-baseimage + name: provisioner-baseimage-service +spec: + ports: + - port: 80 + name: web + selector: + app: provisioner-baseimage + type: LoadBalancer +--- +apiVersion: apps/v1beta2 +kind: StatefulSet +metadata: + labels: + app: provisioner-baseimage + name: provisioner-baseimage-statefulset +spec: + selector: + matchLabels: + app: provisioner-baseimage + serviceName: provisioner-baseimage + replicas: 2 + template: + metadata: + labels: + app: provisioner-baseimage + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: nginx + image: nginx:stable-alpine + imagePullPolicy: IfNotPresent + volumeMounts: + - name: baseimage-vol + mountPath: /usr/share/nginx/html + ports: + - containerPort: 80 + volumeClaimTemplates: + - metadata: + name: baseimage-vol + spec: + accessModes: [ "ReadWriteOnce" ] + storageClassName: baseimage-storageclass + resources: + requests: + storage: 32Mi From eb65b2f9616b5c5eb339ac79233cf62a594467be Mon Sep 17 00:00:00 2001 From: JacieChao Date: Wed, 15 Aug 2018 10:33:05 +0800 Subject: [PATCH 23/44] Update default value of StorageReserved value of root disk --- docs/multidisk.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/multidisk.md b/docs/multidisk.md index 0221f50..ce1f569 100644 --- a/docs/multidisk.md +++ b/docs/multidisk.md @@ -11,7 +11,9 @@ To add any additional disks, user needs to: Longhorn will detect the storage information (e.g. maximum space, available space) about the disk automatically, and start scheduling to it if it's possible to accomodate the volume in there. A path mounted by the existing disk won't be allowed. -User can reserve a certain amount of space of the disk to stop Longhorn from using it. It can be set in the `Space Reserved` field for the disk. It's useful for the non-dedicated storage disk on the node. +User can reserve a certain amount of space of the disk to stop Longhorn from using it. It can be set in the `Space Reserved` field for the disk. It's useful for the non-dedicated storage disk on the node. + +The kubelet needs to preserve node stability when available compute resources are low. This is especially important when dealing with incompressible compute resources, such as memory or disk space. If such resources are exhausted, nodes become unstable. To avoid kubelet `Disk pressure` issue after scheduling several volumes, by default, longhorn reserved 30% of root disk space (`/var/lib/rancher/longhorn`) to ensure node stability. Nodes and disks can be excluded from future scheduling. Notice any scheduled storage space won't be released automatically if the scheduling was disabled for the node. From bb1ac25afd6cca7f8a3e91afa7754bf533fee09a Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Aug 2018 16:08:30 -0700 Subject: [PATCH 24/44] Sync with Longhorn Manager Manager commit: commit 266f566783e47ce5e351f98491ea5b7b86a875d2 Author: Sheng Yang Date: Tue Aug 21 11:48:22 2018 -0700 Update images Manager: rancher/longhorn-manager:2c17d9e UI: rancher/longhorn-ui:72303d9 --- deploy/longhorn.yaml | 12 ++-- scripts/environment_check.sh | 107 +++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 6 deletions(-) create mode 100755 scripts/environment_check.sh diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index d2b12ae..58f6ce7 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:v0.3-rc4 + image: rancher/longhorn-manager:2c17d9e imagePullPolicy: Always securityContext: privileged: true @@ -189,7 +189,7 @@ spec: - --engine-image - rancher/longhorn-engine:v0.3-rc4 - --manager-image - - rancher/longhorn-manager:v0.3-rc4 + - rancher/longhorn-manager:2c17d9e - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:v0.3-rc4 + image: rancher/longhorn-ui:72303d9 ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:v0.3-rc4 + image: rancher/longhorn-manager:2c17d9e command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:v0.3-rc4 + image: rancher/longhorn-manager:2c17d9e imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:v0.3-rc4 + - rancher/longhorn-manager:2c17d9e - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" diff --git a/scripts/environment_check.sh b/scripts/environment_check.sh new file mode 100755 index 0000000..6ba4934 --- /dev/null +++ b/scripts/environment_check.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +dependencies() { + local targets=($@) + local allFound=true + for ((i=0; i<${#targets[@]}; i++)); do + local target=${targets[$i]} + if [ "$(which $target)" == "" ]; then + allFound=false + echo Not found: $target + fi + done + if [ "$allFound" == "false" ]; then + echo "Please install missing dependencies." + exit 2 + fi +} + +create_ds() { +cat < $TEMP_DIR/environment_check.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: longhorn-environment-check + name: longhorn-environment-check +spec: + selector: + matchLabels: + app: longhorn-environment-check + template: + metadata: + labels: + app: longhorn-environment-check + spec: + containers: + - name: longhorn-environment-check + image: busybox + args: ["/bin/sh", "-c", "sleep 1000000000"] + volumeMounts: + - name: mountpoint + mountPath: /mnt/tmp + mountPropagation: Bidirectional + securityContext: + privileged: true + volumes: + - name: mountpoint + hostPath: + path: /mnt/tmp +EOF + kubectl create -f $TEMP_DIR/environment_check.yaml +} + +cleanup() { + kubectl delete -f $TEMP_DIR/environment_check.yaml + rm -rf $TEMP_DIR +} + +wait_ds_ready() { + while true; do + local ds=$(kubectl get ds/longhorn-environment-check -o json) + local numberReady=$(echo $ds | jq .status.numberReady) + local desiredNumberScheduled=$(echo $ds | jq .status.desiredNumberScheduled) + + if [ "$desiredNumberScheduled" == "$numberReady" ] && [ "$desiredNumberScheduled" != "0" ]; then + echo "all pods ready ($numberReady/$desiredNumberScheduled)" + return + fi + + echo "waiting for pods to become ready ($numberReady/$desiredNumberScheduled)" + sleep 3 + done +} + +validate_pods() { + local allSupported=true + local pods=$(kubectl -l app=longhorn-environment-check get po -o json) + + for ((i=0; i<1; i++)); do + local pod=$(echo $pods | jq .items[$i]) + local nodeName=$(echo $pod | jq -r .spec.nodeName) + local mountPropagation=$(echo $pod | jq -r '.spec.containers[0].volumeMounts[] | select(.name=="mountpoint") | .mountPropagation') + + if [ "$mountPropagation" != "Bidirectional" ]; then + allSupported=false + echo "node $nodeName: MountPropagation DISABLED" + fi + done + + if [ "$allSupported" != "true" ]; then + echo + echo " MountPropagation is disabled on at least one node." + echo " As a result, CSI Driver and Base Image aren't supported." + echo + exit 1 + else + echo -e "\n MountPropagation is enabled!\n" + fi +} + +dependencies kubectl jq mktemp +TEMP_DIR=$(mktemp -d) +trap cleanup EXIT +create_ds +wait_ds_ready +validate_pods +exit 0 From 0dc32d7676dc34a50c5116c6f71a881ad9028efa Mon Sep 17 00:00:00 2001 From: James Oliver Date: Fri, 17 Aug 2018 17:19:22 -0700 Subject: [PATCH 25/44] Document upgrade path from v0.1, v0.2 to v0.3 --- README.md | 14 ++- docs/upgrade.md | 246 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 180 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index bc2625f..10d7ab9 100644 --- a/README.md +++ b/README.md @@ -66,19 +66,23 @@ Both `kube-apiserver` and `kubelet` should have `--feature-gates=MountPropagatio 2. Google GKE: `/home/kubernetes/flexvolume` 3. For other distro, please find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir` parameter. If there is none, it would be the default value `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` . +# Upgrading + +For instructions on how to upgrade Longhorn v0.1 or v0.2 to v0.3, [see this document](docs/upgrade.md#upgrade). + # Deployment Create the deployment of Longhorn in your Kubernetes cluster is easy. If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. ``` FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml -kubectl create -f longhorn.yaml +kubectl apply -f longhorn.yaml ``` For Google Kubernetes Engine (GKE) users, see [here](#google-kubernetes-engine) before proceed. @@ -145,12 +149,12 @@ Longhorn provides persistent volume directly to Kubernetes through one of the Lo Use following command to create a default Longhorn StorageClass named `longhorn`. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/storageclass.yaml +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/storageclass.yaml ``` Now you can create a pod using Longhorn like this: ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/pvc.yaml +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/examples/pvc.yaml ``` The yaml contains two parts: @@ -214,7 +218,7 @@ We provides two testing purpose backupstore based on NFS server and Minio S3 ser Use following command to setup a Minio S3 server for BackupStore after `longhorn-system` was created. ``` -kubectl create -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/backupstores/minio-backupstore.yaml +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/backupstores/minio-backupstore.yaml ``` Now set `Settings/General/BackupTarget` to diff --git a/docs/upgrade.md b/docs/upgrade.md index 8c83a71..b7cd103 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -1,100 +1,196 @@ # Upgrade -Here we would cover how to upgrade from Longhorn v0.2 to Longhorn v0.3 release. +Here we cover how to upgrade to Longhorn v0.3 from all previous releases. -## Backup your existing data -1. It's recommended to create a latest backup for every volume to the backupstore before upgrade. -2. Make sure no volume is in degraded or faulted state. -3. Shutdown related Kubernetes pods. Detach all the volumes. Make sure all the volumes are detached before proceeding. -4. Backup CRD yaml to local directory: +## Backup Existing Volumes + +It's recommended to create a recent backup of every volume to the backupstore +before upgrade. + +Create an on-cluster backupstore if you haven't already. We'll use NFS in this +example. ``` -kubectl -n longhorn-system get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml -kubectl -n longhorn-system get engines.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-engines.yaml -kubectl -n longhorn-system get replicas.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-replicas.yaml -kubectl -n longhorn-system get settings.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-settings.yaml +kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/backupstores/nfs-backupstore.yaml ``` -5. Noted the value of BackupTarget in the setting. The user would need to reset after upgrade. -## Upgrade from v0.2 to v0.3 +On Settings page, set Backup Target to +`nfs://longhorn-test-nfs-svc.default:/opt/backupstore` and click `Save`. -Please be aware that the upgrade will incur API downtime. +Navigate to each volume detail page and click `Take Snapshot`. Click the new +snapshot and click `Backup`. -### 1. Remove the old manager +## Check For Issues + +Make sure no volume is in degraded or faulted state. Wait for degraded +volumes to heal and delete/restore faulted volumes before proceeding. + +## Detach Volumes + +Shutdown all Kubernetes Pods using Longhorn volumes in order to detach the +volumes. The easiest way to achieve this is by deleting all workloads. If +this is not desirable, some workloads may be suspended. We will cover how +each workload can be modified to shut down its pods. + +### CronJob +Edit the cronjob with `kubectl edit cronjob/`. +Set `.spec.suspend` to `true`. +Wait for any currently executing jobs to complete, or terminate them by +deleting relevant pods. + +### DaemonSet +Delete the daemonset with `kubectl delete ds/`. +There is no way to suspend this workload. + +### Deployment +Edit the deployment with `kubectl edit deploy/`. +Set `.spec.replicas` to `0`. + +### Job +Consider allowing the single-run job to complete. +Otherwise, delete the job with `kubectl delete job/`. + +### Pod +Delete the pod with `kubectl delete pod/`. +There is no way to suspend a pod not managed by a workload controller. + +### ReplicaSet +Edit the replicaset with `kubectl edit replicaset/`. +Set `.spec.replicas` to `0`. + +### ReplicationController +Edit the replicationcontroller with `kubectl edit rc/`. +Set `.spec.replicas` to `0`. + +### StatefulSet +Edit the statefulset with `kubectl edit statefulset/`. +Set `.spec.replicas` to `0`. + +Detach all remaining volumes from Longhorn UI. These volumes were most likely +created and attached outside of Kubernetes via Longhorn UI or REST API. + +## Uninstall Old Version + +Make note of `BackupTarget` on the `Setting` page. You will need to manually +set `BackupTarget` after upgrading from either v0.1 or v0.2. + +Delete Longhorn components. + +For Longhorn `v0.1`: +``` +kubectl delete -f https://raw.githubusercontent.com/llparse/longhorn/v0.1/deploy/uninstall-for-upgrade.yaml +``` + +For Longhorn `v0.2`: ``` kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.2/deploy/uninstall-for-upgrade.yaml ``` -### 2. Install the new manager - -We will use `kubectl apply` instead of `kubectl create` to install the new version of the manager. - -If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: +If both commands returned `Not found` for all components, Longhorn is probably +deployed in a different namespace. Determine which namespace is in use and +adjust `NAMESPACE` accordingly: ``` -kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml -``` -If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. -``` -FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" -curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml -kubectl apply -f longhorn.yaml +NAMESPACE=longhorn-custom-ns +curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.1/deploy/uninstall-for-upgrade.yaml|sed "s#^\( *\)namespace: longhorn#\1namespace: ${NAMESPACE}#g" > longhorn.yaml +kubectl delete -f longhorn.yaml ``` -For Google Kubernetes Engine (GKE) users, see [here](./gke.md) before proceed. +## Backup Longhorn System -Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. +Backup Longhorn CRD yaml to local directory. -When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. - -Deployed with CSI driver: +### v0.1 +Check your backups to make sure Longhorn was running in namespace `longhorn`. ``` -# kubectl -n longhorn-system get pod -NAME READY STATUS RESTARTS AGE -csi-attacher-0 1/1 Running 0 6h -csi-provisioner-0 1/1 Running 0 6h -engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d -engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d -engine-image-ei-57b85e25-t2787 1/1 Running 0 7d -longhorn-csi-plugin-4cpk2 2/2 Running 0 6h -longhorn-csi-plugin-ll6mq 2/2 Running 0 6h -longhorn-csi-plugin-smlsh 2/2 Running 0 6h -longhorn-driver-deployer-7b5bdcccc8-fbncl 1/1 Running 0 6h -longhorn-manager-7x8x8 1/1 Running 0 6h -longhorn-manager-8kqf4 1/1 Running 0 6h -longhorn-manager-kln4h 1/1 Running 0 6h -longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d -``` -Or with Flexvolume driver -``` -# kubectl -n longhorn-system get pod -NAME READY STATUS RESTARTS AGE -engine-image-ei-57b85e25-8v65d 1/1 Running 0 7d -engine-image-ei-57b85e25-gjjs6 1/1 Running 0 7d -engine-image-ei-57b85e25-t2787 1/1 Running 0 7d -longhorn-driver-deployer-5469b87b9c-b9gm7 1/1 Running 0 2h -longhorn-flexvolume-driver-lth5g 1/1 Running 0 2h -longhorn-flexvolume-driver-tpqf7 1/1 Running 0 2h -longhorn-flexvolume-driver-v9mrj 1/1 Running 0 2h -longhorn-manager-7x8x8 1/1 Running 0 9h -longhorn-manager-8kqf4 1/1 Running 0 9h -longhorn-manager-kln4h 1/1 Running 0 9h -longhorn-ui-f849dcd85-cgkgg 1/1 Running 0 5d +NAMESPACE=longhorn +kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml > longhorn-v0.1-backup-volumes.yaml +kubectl -n ${NAMESPACE} get engines.longhorn.rancher.io -o yaml > longhorn-v0.1-backup-engines.yaml +kubectl -n ${NAMESPACE} get replicas.longhorn.rancher.io -o yaml > longhorn-v0.1-backup-replicas.yaml +kubectl -n ${NAMESPACE} get settings.longhorn.rancher.io -o yaml > longhorn-v0.1-backup-settings.yaml ``` -### 3. Upgrade Engine Images and set BackupTarget +### v0.2 +Check your backups to make sure Longhorn was running in namespace +`longhorn-system`. +``` +NAMESPACE=longhorn-system +kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml +kubectl -n ${NAMESPACE} get engines.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-engines.yaml +kubectl -n ${NAMESPACE} get replicas.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-replicas.yaml +kubectl -n ${NAMESPACE} get settings.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-settings.yaml +``` -1. Wait until the UI is up. -2. Set the BackupTarget in the setting to the same value as before upgrade. -3. Make all the volumes are all detached. -4. Select all the volumes using batch selection. Click batch operation button - `Upgrade Engine`, choose the only engine image available in the list. It's - the default engine shipped with the manager for this release. -5. Now attach the volume one by one, to see if the volume works correctly. +## Delete CRDs in Different Namespace + +This is only required for Rancher users running Longhorn App `v0.1`. Delete all +CRDs from your namespace which is probably `longhorn`. +``` +NAMESPACE=longhorn +kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml | sed "s/\- longhorn.rancher.io//g" | kubectl apply -f - +kubectl -n ${NAMESPACE} get engines.longhorn.rancher.io -o yaml | sed "s/\- longhorn.rancher.io//g" | kubectl apply -f - +kubectl -n ${NAMESPACE} get replicas.longhorn.rancher.io -o yaml | sed "s/\- longhorn.rancher.io//g" | kubectl apply -f - +kubectl -n ${NAMESPACE} get settings.longhorn.rancher.io -o yaml | sed "s/\- longhorn.rancher.io//g" | kubectl apply -f - +kubectl -n ${NAMESPACE} delete volumes.longhorn.rancher.io --all +kubectl -n ${NAMESPACE} delete engines.longhorn.rancher.io --all +kubectl -n ${NAMESPACE} delete replicas.longhorn.rancher.io --all +kubectl -n ${NAMESPACE} delete settings.longhorn.rancher.io --all +``` + +## Install Longhorn v0.3 + +### Rancher 2.x +For Rancher users who are running Longhorn v0.1, delete the Longhorn App from +`Catalog Apps` screen in Rancher UI. *Do not click the upgrade button.* Launch +Longhorn App template version `0.3.0-rc4`. + +### Other Kubernetes Distro + +For Longhorn v0.2 users who are not using Rancher, follow +[the official Longhorn Deployment instructions](../README.md#deployment). + +## Restore Longhorn System + +This step is only required for Rancher users running Longhorn App `v0.1`. + +``` +NAMESPACE=longhorn-system +sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-settings.yaml | kubectl apply -f - +sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-replicas.yaml | kubectl apply -f - +sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-engines.yaml | kubectl apply -f - +sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-volumes.yaml | kubectl apply -f - +``` + +## Access UI and Set BackupTarget + +Wait until the longhorn-ui pod is `Running`: +``` +kubectl -n longhorn-system get pod -w +``` + +[Access the UI](../README.md#access-the-ui). + +On `Setting > General`, set `Backup Target` to the backup target used in +the previous version. In our example, this is +`nfs://longhorn-test-nfs-svc.default:/opt/backupstore`. + +## Upgrade Engine Images + +Ensure all volumes are detached. If any are still attached, detach them now +and wait until they are in `Detached` state. + +Select all the volumes using batch selection. Click batch operation button +`Upgrade Engine`, choose the only engine image available in the list. It's +the default engine shipped with the manager for this release. + +## Attach Volumes + +Now we will resume all workloads by reversing the changes we made to detach +the volumes. Any volume not part of a K8s workload or pod must be attached +manually. ## Note -Upgrade is always tricky. Keep backups for the volumes are critical. - -If you have any issues, please reported it at -https://github.com/rancher/longhorn/issues , with your backup yaml files as well -as manager logs. +Upgrade is always tricky. Keeping recent backups for volumes is critical. +If you have any issues, please report it at +https://github.com/rancher/longhorn/issues and include your backup yaml files +as well as manager logs. From 4fe551be8a487ce4e6802a75024a0e1c5050c15d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Aug 2018 17:27:57 -0700 Subject: [PATCH 26/44] Update upgrade.md --- docs/upgrade.md | 94 +++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index b7cd103..2430ef4 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -7,52 +7,55 @@ Here we cover how to upgrade to Longhorn v0.3 from all previous releases. It's recommended to create a recent backup of every volume to the backupstore before upgrade. -Create an on-cluster backupstore if you haven't already. We'll use NFS in this -example. +If you don't have a on-cluster backupstore already, create one. Here we'll use NFS for example. +1. Execute following command to create the backupstore ``` kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/backupstores/nfs-backupstore.yaml ``` - -On Settings page, set Backup Target to +2. On Longhorn UI Settings page, set Backup Target to `nfs://longhorn-test-nfs-svc.default:/opt/backupstore` and click `Save`. -Navigate to each volume detail page and click `Take Snapshot`. Click the new -snapshot and click `Backup`. +Navigate to each volume detail page and click `Take Snapshot` (it's recommended to run `sync` in the host command line before `Take Snapshot`). Click the new +snapshot and click `Backup`. Wait for the new backup to show up in the volume's backup list before continuing. ## Check For Issues Make sure no volume is in degraded or faulted state. Wait for degraded -volumes to heal and delete/restore faulted volumes before proceeding. +volumes to heal and delete/salvage faulted volumes before proceeding. ## Detach Volumes Shutdown all Kubernetes Pods using Longhorn volumes in order to detach the -volumes. The easiest way to achieve this is by deleting all workloads. If +volumes. The easiest way to achieve this is by deleting all workloads and recreate them later after upgrade. If this is not desirable, some workloads may be suspended. We will cover how each workload can be modified to shut down its pods. +### Deployment +Edit the deployment with `kubectl edit deploy/`. +Set `.spec.replicas` to `0`. + +### StatefulSet +Edit the statefulset with `kubectl edit statefulset/`. +Set `.spec.replicas` to `0`. + +### DaemonSet +There is no way to suspend this workload. +Delete the daemonset with `kubectl delete ds/`. + +### Pod +Delete the pod with `kubectl delete pod/`. +There is no way to suspend a pod not managed by a workload controller. + ### CronJob Edit the cronjob with `kubectl edit cronjob/`. Set `.spec.suspend` to `true`. Wait for any currently executing jobs to complete, or terminate them by deleting relevant pods. -### DaemonSet -Delete the daemonset with `kubectl delete ds/`. -There is no way to suspend this workload. - -### Deployment -Edit the deployment with `kubectl edit deploy/`. -Set `.spec.replicas` to `0`. - ### Job Consider allowing the single-run job to complete. Otherwise, delete the job with `kubectl delete job/`. -### Pod -Delete the pod with `kubectl delete pod/`. -There is no way to suspend a pod not managed by a workload controller. - ### ReplicaSet Edit the replicaset with `kubectl edit replicaset/`. Set `.spec.replicas` to `0`. @@ -61,21 +64,19 @@ Set `.spec.replicas` to `0`. Edit the replicationcontroller with `kubectl edit rc/`. Set `.spec.replicas` to `0`. -### StatefulSet -Edit the statefulset with `kubectl edit statefulset/`. -Set `.spec.replicas` to `0`. +Wait for the volumes using by the Kubernetes to complete detaching. -Detach all remaining volumes from Longhorn UI. These volumes were most likely +Then detach all remaining volumes from Longhorn UI. These volumes were most likely created and attached outside of Kubernetes via Longhorn UI or REST API. -## Uninstall Old Version +## Uninstall the Old Version of Longhorn Make note of `BackupTarget` on the `Setting` page. You will need to manually set `BackupTarget` after upgrading from either v0.1 or v0.2. Delete Longhorn components. -For Longhorn `v0.1`: +For Longhorn `v0.1` (most likely installed using Longhorn App in Rancher 2.0): ``` kubectl delete -f https://raw.githubusercontent.com/llparse/longhorn/v0.1/deploy/uninstall-for-upgrade.yaml ``` @@ -87,19 +88,20 @@ kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.2/deploy If both commands returned `Not found` for all components, Longhorn is probably deployed in a different namespace. Determine which namespace is in use and -adjust `NAMESPACE` accordingly: +adjust `NAMESPACE` here accordingly: ``` -NAMESPACE=longhorn-custom-ns +NAMESPACE= curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.1/deploy/uninstall-for-upgrade.yaml|sed "s#^\( *\)namespace: longhorn#\1namespace: ${NAMESPACE}#g" > longhorn.yaml kubectl delete -f longhorn.yaml ``` ## Backup Longhorn System -Backup Longhorn CRD yaml to local directory. +We're going to backup Longhorn CRD yaml to local directory, so we can restore or inspect them later. ### v0.1 -Check your backups to make sure Longhorn was running in namespace `longhorn`. +User must backup the CRDs for v0.1 because we will change the default deploying namespace for Longhorn. +Check your backups to make sure Longhorn was running in namespace `longhorn`, otherwise change the value of `NAMESPACE` below. ``` NAMESPACE=longhorn kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml > longhorn-v0.1-backup-volumes.yaml @@ -110,7 +112,7 @@ kubectl -n ${NAMESPACE} get settings.longhorn.rancher.io -o yaml > longhorn-v0.1 ### v0.2 Check your backups to make sure Longhorn was running in namespace -`longhorn-system`. +`longhorn-system`, otherwise change the value of `NAMESPACE` below. ``` NAMESPACE=longhorn-system kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml > longhorn-v0.2-backup-volumes.yaml @@ -122,7 +124,7 @@ kubectl -n ${NAMESPACE} get settings.longhorn.rancher.io -o yaml > longhorn-v0.2 ## Delete CRDs in Different Namespace This is only required for Rancher users running Longhorn App `v0.1`. Delete all -CRDs from your namespace which is probably `longhorn`. +CRDs from your namespace which is `longhorn` by default. ``` NAMESPACE=longhorn kubectl -n ${NAMESPACE} get volumes.longhorn.rancher.io -o yaml | sed "s/\- longhorn.rancher.io//g" | kubectl apply -f - @@ -137,19 +139,13 @@ kubectl -n ${NAMESPACE} delete settings.longhorn.rancher.io --all ## Install Longhorn v0.3 -### Rancher 2.x -For Rancher users who are running Longhorn v0.1, delete the Longhorn App from -`Catalog Apps` screen in Rancher UI. *Do not click the upgrade button.* Launch +### Installed with Longhorn App v0.1 in Rancher 2.x +For Rancher users who are running Longhorn v0.1, *Do not click the upgrade button.* + +1. Delete the Longhorn App from `Catalog Apps` screen in Rancher UI. Launch Longhorn App template version `0.3.0-rc4`. - -### Other Kubernetes Distro - -For Longhorn v0.2 users who are not using Rancher, follow -[the official Longhorn Deployment instructions](../README.md#deployment). - -## Restore Longhorn System - -This step is only required for Rancher users running Longhorn App `v0.1`. +2. Restore Longhorn System. This step is only required for Rancher users running Longhorn App `v0.1`. +Don't change the NAMESPACE variable below. Longhorn system will be installed in the `longhorn-system` namespace. ``` NAMESPACE=longhorn-system @@ -159,9 +155,15 @@ sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-en sed "s#^\( *\)namespace: .*#\1namespace: ${NAMESPACE}#g" longhorn-v0.1-backup-volumes.yaml | kubectl apply -f - ``` +### Installed without using Longhorn App v0.1 + +For Longhorn v0.2 users who are not using Rancher, follow +[the official Longhorn Deployment instructions](../README.md#deployment). + + ## Access UI and Set BackupTarget -Wait until the longhorn-ui pod is `Running`: +Wait until the longhorn-ui and longhorn-manager pods are `Running`: ``` kubectl -n longhorn-system get pod -w ``` @@ -189,7 +191,7 @@ manually. ## Note -Upgrade is always tricky. Keeping recent backups for volumes is critical. +Upgrade is always tricky. Keeping recent backups for volumes is critical. If anything goes wrong, you can restore the volume using the backup. If you have any issues, please report it at https://github.com/rancher/longhorn/issues and include your backup yaml files From 9098172019b50d633b90aefdd9e66b1b4aa45ef4 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Aug 2018 20:10:20 -0700 Subject: [PATCH 27/44] Sync with Longhorn Manager commit 7344e505df6d3b779d46c4fb153c3169316fe753 Author: James Oliver Date: Tue Aug 21 18:51:13 2018 -0700 Detect flexvolume path by inspecting host pid namespace --- scripts/environment_check.sh | 85 +++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/scripts/environment_check.sh b/scripts/environment_check.sh index 6ba4934..84eb41a 100755 --- a/scripts/environment_check.sh +++ b/scripts/environment_check.sh @@ -39,23 +39,95 @@ spec: args: ["/bin/sh", "-c", "sleep 1000000000"] volumeMounts: - name: mountpoint - mountPath: /mnt/tmp + mountPath: /tmp/longhorn-environment-check mountPropagation: Bidirectional securityContext: privileged: true volumes: - name: mountpoint hostPath: - path: /mnt/tmp + path: /tmp/longhorn-environment-check EOF kubectl create -f $TEMP_DIR/environment_check.yaml } +create_pod() { +cat < $TEMP_DIR/detect-flexvol-dir.yaml +apiVersion: v1 +kind: Pod +metadata: + name: detect-flexvol-dir +spec: + containers: + - name: detect-flexvol-dir + image: busybox + command: ["/bin/sh"] + args: + - -c + - | + find_kubelet_proc() { + for proc in \`find /proc -type d -maxdepth 1\`; do + if [ ! -f \$proc/cmdline ]; then + continue + fi + if [[ "\$(cat \$proc/cmdline | tr '\000' '\n' | head -n1 | tr '/' '\n' | tail -n1)" == "kubelet" ]]; then + echo \$proc + return + fi + done + } + get_flexvolume_path() { + proc=\$(find_kubelet_proc) + if [ "\$proc" != "" ]; then + path=\$(cat \$proc/cmdline | tr '\000' '\n' | grep volume-plugin-dir | tr '=' '\n' | tail -n1) + if [ "\$path" == "" ]; then + echo '/usr/libexec/kubernetes/kubelet-plugins/volume/exec/' + else + echo \$path + fi + return + fi + echo 'no kubelet process found, dunno' + } + get_flexvolume_path + securityContext: + privileged: true + hostPID: true + restartPolicy: Never +EOF + kubectl create -f $TEMP_DIR/detect-flexvol-dir.yaml +} + cleanup() { - kubectl delete -f $TEMP_DIR/environment_check.yaml + kubectl delete -f $TEMP_DIR/environment_check.yaml & + a=$! + kubectl delete -f $TEMP_DIR/detect-flexvol-dir.yaml & + b=$! + wait $a + wait $b rm -rf $TEMP_DIR } +wait_pod_ready() { + while true; do + local pod=$(kubectl get po/detect-flexvol-dir -o json) + local phase=$(echo $pod | jq -r .status.phase) + + if [ "$phase" == "Succeeded" ]; then + echo "pod/detect-flexvol-dir completed" + return + fi + + echo "waiting for pod/detect-flexvol-dir to finish" + sleep 3 + done +} + +validate_pod() { + flexvol_path=$(kubectl logs detect-flexvol-dir) + echo -e "\n FlexVolume Path: ${flexvol_path}\n" +} + wait_ds_ready() { while true; do local ds=$(kubectl get ds/longhorn-environment-check -o json) @@ -72,7 +144,7 @@ wait_ds_ready() { done } -validate_pods() { +validate_ds() { local allSupported=true local pods=$(kubectl -l app=longhorn-environment-check get po -o json) @@ -101,7 +173,10 @@ validate_pods() { dependencies kubectl jq mktemp TEMP_DIR=$(mktemp -d) trap cleanup EXIT +create_pod create_ds +wait_pod_ready wait_ds_ready -validate_pods +validate_pod +validate_ds exit 0 From 158c767e558721a4cd4d740af03aaa4a4d078e08 Mon Sep 17 00:00:00 2001 From: James Oliver Date: Tue, 21 Aug 2018 20:39:53 -0700 Subject: [PATCH 28/44] Refer to check environment script where applicable --- README.md | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 10d7ab9..7989a73 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Longhorn is 100% open source software. Project source code is spread across a nu ## Kubernetes Driver Requirements -Longhorn can be used in Kubernetes to provide persistent storage through either Longhorn Container Storage Interface (CSI) driver or Longhorn Flexvolume driver. Longhorn will automatically deploy one of the drivers, depends on user's Kubernetes cluster's setup. User can also specify the driver in the deployment yaml file. CSI is preferred. +Longhorn can be used in Kubernetes to provide persistent storage through either Longhorn Container Storage Interface (CSI) driver or Longhorn FlexVolume driver. Longhorn will automatically deploy one of the drivers, depending on the Kubernetes cluster configuration. User can also specify the driver in the deployment yaml file. CSI is preferred. ### Requirement for the CSI driver @@ -37,7 +37,7 @@ Longhorn can be used in Kubernetes to provide persistent storage through either 1. CSI is in beta release for this version of Kubernetes, and enabled by default. 2. Mount Propagation feature gate enabled. 1. It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. -3. If above conditions cannot be met, Longhorn will falls back to use Flexvolume driver. +3. If above conditions cannot be met, Longhorn will fall back to the FlexVolume driver. ### Check if your setup satisfied CSI requirement 1. Use the following command to check your Kubernetes server version @@ -48,14 +48,25 @@ Server Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.1", GitCom ``` The `Server Version` should be `v1.10` or above. -2. Use the following command on the hosts to check if the feature gate is enabled for Mount Propagation +2. Use the following script to check if `MountPropagation` feature is enabled. ``` -# ps aux|grep kube|grep MountPropagation -root 1707 3.1 12.4 1087008 503848 ? Ssl Jul12 1288:35 kube-apiserver --storage-backend=etcd3 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --tls-cert-file=/etc/kubernetes/ssl/kube-apiserver.pem --kubelet-client-certificate=/etc/kubernetes/ssl/kube-apiserver.pem --apiserver-count=1 --secure-port=6443 --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname --kubelet-client-key=/etc/kubernetes/ssl/kube-apiserver-key.pem --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --allow-privileged=true --insecure-port=0 --admission-control=ServiceAccount,NamespaceLifecycle,LimitRanger,PersistentVolumeLabel,DefaultStorageClass,ResourceQuota,DefaultTolerationSeconds --cloud-provider= --service-cluster-ip-range=10.43.0.0/16 --tls-private-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --service-account-key-file=/etc/kubernetes/ssl/kube-apiserver-key.pem --authorization-mode=Node,RBAC --bind-address=0.0.0.0 --feature-gates=MountPropagation=true --insecure-bind-address=127.0.0.1 --etcd-cafile=/etc/kubernetes/ssl/kube-ca.pem --etcd-certfile=/etc/kubernetes/ssl/kube-node.pem --etcd-keyfile=/etc/kubernetes/ssl/kube-node-key.pem --etcd-servers=https://138.197.199.191:2379 --etcd-prefix=/registry -root 1760 4.7 6.4 1508564 260724 ? Ssl Jul12 1970:59 kubelet --network-plugin=cni --resolv-conf=/etc/resolv.conf --cluster-domain=cluster.local --v=2 --enforce-node-allocatable= --cgroups-per-qos=True --cni-bin-dir=/opt/cni/bin --cluster-dns=10.43.0.10 --cloud-provider= --fail-swap-on=false --address=0.0.0.0 --cadvisor-port=0 --volume-plugin-dir=/var/lib/kubelet/volumeplugins --hostname-override=yasker-longhorn-dev-1 --client-ca-file=/etc/kubernetes/ssl/kube-ca.pem --root-dir=/var/lib/kubelet --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 --feature-gates=MountPropagation=true --cni-conf-dir=/etc/cni/net.d --allow-privileged=true --pod-infra-container-image=rancher/pause-amd64:3.0 --kubeconfig=/etc/kubernetes/ssl/kubecfg-kube-node.yaml --read-only-port=0 --anonymous-auth=false --cgroup-driver=cgroupfs -``` -Both `kube-apiserver` and `kubelet` should have `--feature-gates=MountPropagation=true` +# curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/environment_check.sh | bash +pod/detect-flexvol-dir created +daemonset.apps/longhorn-environment-check created +waiting for pod/detect-flexvol-dir to finish +pod/detect-flexvol-dir completed +waiting for pods to become ready (1/7) +waiting for pods to become ready (6/7) +all pods ready (7/7) + FlexVolume Path: /var/lib/kubelet/volumeplugins + + + MountPropagation is enabled! + +pod "detect-flexvol-dir" deleted +daemonset.apps "longhorn-environment-check" deleted +``` ### Requirement for the Flexvolume driver @@ -64,7 +75,29 @@ Both `kube-apiserver` and `kubelet` should have `--feature-gates=MountPropagatio 3. User need to know the volume plugin directory in order to setup the driver correctly. 1. Rancher RKE: `/var/lib/kubelet/volumeplugins` 2. Google GKE: `/home/kubernetes/flexvolume` - 3. For other distro, please find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir` parameter. If there is none, it would be the default value `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` . + 3. For any other distro, please run the directory detection script in the next section. + +### Detect Volume Plugin Directory + +Use the following script to detect your volume plugin directory. +``` +# curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/environment_check.sh | bash +pod/detect-flexvol-dir created +daemonset.apps/longhorn-environment-check created +waiting for pod/detect-flexvol-dir to finish +pod/detect-flexvol-dir completed +waiting for pods to become ready (1/7) +waiting for pods to become ready (6/7) +all pods ready (7/7) + + FlexVolume Path: /var/lib/kubelet/volumeplugins + + + MountPropagation is enabled! + +pod "detect-flexvol-dir" deleted +daemonset.apps "longhorn-environment-check" deleted +``` # Upgrading From 87c733c101b57d7f67fe6394a1e8d4bddcf2cc35 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Aug 2018 23:19:43 -0700 Subject: [PATCH 29/44] Sync with manager: commit 3b86e81b2157a4457daaef10871bfae78356ae18 Author: Sheng Yang Date: Tue Aug 21 23:09:11 2018 -0700 Update image to rancher/longhorn-manager:v0.3-rc5 Engine image: rancher/longhorn-engine:v0.3-rc5 UI Image: rancher/longhorn-ui:v0.3-rc5 --- deploy/longhorn.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index 58f6ce7..6e90798 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:2c17d9e + image: rancher/longhorn-manager:v0.3-rc5 imagePullPolicy: Always securityContext: privileged: true @@ -187,9 +187,9 @@ spec: - -d - daemon - --engine-image - - rancher/longhorn-engine:v0.3-rc4 + - rancher/longhorn-engine:v0.3-rc5 - --manager-image - - rancher/longhorn-manager:2c17d9e + - rancher/longhorn-manager:v0.3-rc5 - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:72303d9 + image: rancher/longhorn-ui:v0.3-rc5 ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:2c17d9e + image: rancher/longhorn-manager:v0.3-rc5 command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:2c17d9e + image: rancher/longhorn-manager:v0.3-rc5 imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:2c17d9e + - rancher/longhorn-manager:v0.3-rc5 - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" From fba10abf601cd4a5b68c4271d5bb841515275a77 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 21 Aug 2018 23:29:49 -0700 Subject: [PATCH 30/44] Update upgrade.md There is no `0.3.0-rc4`. Just `0.3-rc4`. --- docs/upgrade.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index 2430ef4..d972450 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -143,7 +143,7 @@ kubectl -n ${NAMESPACE} delete settings.longhorn.rancher.io --all For Rancher users who are running Longhorn v0.1, *Do not click the upgrade button.* 1. Delete the Longhorn App from `Catalog Apps` screen in Rancher UI. Launch -Longhorn App template version `0.3.0-rc4`. +Longhorn App template version `0.3-rc4`. 2. Restore Longhorn System. This step is only required for Rancher users running Longhorn App `v0.1`. Don't change the NAMESPACE variable below. Longhorn system will be installed in the `longhorn-system` namespace. From e3f6c79b8e3188ad75575f65640278a88c302a29 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 12:36:59 -0700 Subject: [PATCH 31/44] Update README.md --- README.md | 113 +++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 7989a73..497ef6d 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,34 @@ Longhorn is 100% open source software. Project source code is spread across a nu Longhorn can be used in Kubernetes to provide persistent storage through either Longhorn Container Storage Interface (CSI) driver or Longhorn FlexVolume driver. Longhorn will automatically deploy one of the drivers, depending on the Kubernetes cluster configuration. User can also specify the driver in the deployment yaml file. CSI is preferred. +### Environment check script + +We've wrote a script to help user to get enough information to configure the setup correctly. + +Before installing, run: +``` +curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/environment_check.sh | bash +``` +Example result: +``` +pod/detect-flexvol-dir created +daemonset.apps/longhorn-environment-check created +waiting for pod/detect-flexvol-dir to finish +pod/detect-flexvol-dir completed +waiting for pods to become ready (1/7) +waiting for pods to become ready (6/7) +all pods ready (7/7) + + FlexVolume Path: /var/lib/kubelet/volumeplugins + + + MountPropagation is enabled! + +pod "detect-flexvol-dir" deleted +daemonset.apps "longhorn-environment-check" deleted +``` +Please make a note of `Flexvolume Path` and `MountPropagation` state above. + ### Requirement for the CSI driver 1. Kubernetes v1.10+ @@ -42,78 +70,45 @@ Longhorn can be used in Kubernetes to provide persistent storage through either ### Check if your setup satisfied CSI requirement 1. Use the following command to check your Kubernetes server version ``` -# kubectl version +kubectl version +``` +Result: +``` Client Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.3", GitCommit:"2bba0127d85d5a46ab4b778548be28623b32d0b0", GitTreeState:"clean", BuildDate:"2018-05-21T09:17:39Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} Server Version: version.Info{Major:"1", Minor:"10", GitVersion:"v1.10.1", GitCommit:"d4ab47518836c750f9949b9e0d387f20fb92260b", GitTreeState:"clean", BuildDate:"2018-04-12T14:14:26Z", GoVersion:"go1.9.3", Compiler:"gc", Platform:"linux/amd64"} ``` The `Server Version` should be `v1.10` or above. -2. Use the following script to check if `MountPropagation` feature is enabled. -``` -# curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/environment_check.sh | bash -pod/detect-flexvol-dir created -daemonset.apps/longhorn-environment-check created -waiting for pod/detect-flexvol-dir to finish -pod/detect-flexvol-dir completed -waiting for pods to become ready (1/7) -waiting for pods to become ready (6/7) -all pods ready (7/7) - - FlexVolume Path: /var/lib/kubelet/volumeplugins - - - MountPropagation is enabled! - -pod "detect-flexvol-dir" deleted -daemonset.apps "longhorn-environment-check" deleted -``` +2. The result of environment check script should contain `MountPropagation is enabled!`. ### Requirement for the Flexvolume driver 1. Kubernetes v1.8+ 2. Make sure `curl`, `findmnt`, `grep`, `awk` and `blkid` has been installed in the every node of the Kubernetes cluster. 3. User need to know the volume plugin directory in order to setup the driver correctly. - 1. Rancher RKE: `/var/lib/kubelet/volumeplugins` - 2. Google GKE: `/home/kubernetes/flexvolume` - 3. For any other distro, please run the directory detection script in the next section. - -### Detect Volume Plugin Directory - -Use the following script to detect your volume plugin directory. -``` -# curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/environment_check.sh | bash -pod/detect-flexvol-dir created -daemonset.apps/longhorn-environment-check created -waiting for pod/detect-flexvol-dir to finish -pod/detect-flexvol-dir completed -waiting for pods to become ready (1/7) -waiting for pods to become ready (6/7) -all pods ready (7/7) - - FlexVolume Path: /var/lib/kubelet/volumeplugins - - - MountPropagation is enabled! - -pod "detect-flexvol-dir" deleted -daemonset.apps "longhorn-environment-check" deleted -``` + 1. The correct directory should be reported by the environment check script. + 2. Rancher RKE: `/var/lib/kubelet/volumeplugins` + 3. Google GKE: `/home/kubernetes/flexvolume` + 4. For any other distro, use the value reported by the environment check script. # Upgrading -For instructions on how to upgrade Longhorn v0.1 or v0.2 to v0.3, [see this document](docs/upgrade.md#upgrade). +For instructions on how to upgrade Longhorn App v0.1 or v0.2 to v0.3, [see this document](docs/upgrade.md#upgrade). # Deployment -Create the deployment of Longhorn in your Kubernetes cluster is easy. +Create the deployment of Longhorn in your Kubernetes cluster is straightforward. -If you're using Rancher RKE, or other distro with Kubernetes v1.10+ and Mount Propagation enabled, you can just do: +If CSI is supported (as stated above) you can just do: ``` kubectl apply -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` -If you're using Flexvolume driver with other Kubernetes Distro, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. +If you're using Flexvolume driver with Kubernetes Distro other than RKE, replace the value of $FLEXVOLUME_DIR in the following command with your own Flexvolume Directory as specified above. +``` +FLEXVOLUME_DIR= +``` +Then run ``` -FLEXVOLUME_DIR="/home/kubernetes/flexvolume/" curl -s https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml|sed "s#^\( *\)value: \"/var/lib/kubelet/volumeplugins\"#\1value: \"${FLEXVOLUME_DIR}\"#g" > longhorn.yaml kubectl apply -f longhorn.yaml ``` @@ -173,7 +168,7 @@ If the Kubernetes Cluster supports creating LoadBalancer, user can then use `EXT Longhorn UI would connect to the Longhorn Manager API, provides the overview of the system, the volume operations, and the snapshot/backup operations. It's highly recommended for the user to check out Longhorn UI. -Notice the current UI is unauthenticated. +Noted that the current UI is unauthenticated. # Use the Longhorn with Kubernetes @@ -230,11 +225,12 @@ spec: ``` More examples are available at `./examples/` -# Feature Usage +# Highlight features ### Snapshot A snapshot in Longhorn represents a volume state at a given time, stored in the same location of volume data on physical disk of the host. Snapshot creation is instant in Longhorn. User can revert to any previous taken snapshot using the UI. Since Longhorn is a distributed block storage, please make sure the Longhorn volume is umounted from the host when revert to any previous snapshot, otherwise it will confuse the node filesystem and cause corruption. + ### Backup A backup in Longhorn represents a volume state at a given time, stored in the BackupStore which is outside of the Longhorn System. Backup creation will involving copying the data through the network, so it will take time. @@ -284,21 +280,24 @@ Longhorn supports recurring snapshot and backup for volumes. User only need to s User can find the setting for the recurring snapshot and backup in the `Volume Detail` page. -## Other topics +## Other Features -### [Upgrade from v0.2](./docs/upgrade.md) ### [Multiple disks support](./docs/multidisk.md) ### [iSCSI support](./docs/iscsi.md) -### [Google Kubernetes Engine](./docs/gke.md) -### [Troubleshotting](./docs/troubleshooting.md) ### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) ### [Base Image support](./docs/base-image.md) +## Additional instructions for deployment +### [Google Kubernetes Engine](./docs/gke.md) +### [Upgrade from v0.2](./docs/upgrade.md) +### [Troubleshotting](./docs/troubleshooting.md) + ## Uninstall Longhorn -Longhorn CRD has finalizers in them, so user should delete the volumes and related resource first, give manager a chance to clean up after them. +Longhorn CRD has the finalizers in them, so user should delete the volumes and related resource first, give manager a chance to clean up after them. ### 1. Clean up volume and related resources +Noted that you would lose all you data after done this. It's recommended to make backups before proceeding if you intent to keep the data. ``` kubectl -n longhorn-system delete volumes.longhorn.rancher.io --all From 7d4d7eb947cf54377f5150badb9ba1c5210ddf8e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 12:38:55 -0700 Subject: [PATCH 32/44] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 497ef6d..bf8d6c1 100644 --- a/README.md +++ b/README.md @@ -289,7 +289,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Additional instructions for deployment ### [Google Kubernetes Engine](./docs/gke.md) -### [Upgrade from v0.2](./docs/upgrade.md) +### [Upgrade from v0.1/v0.2](./docs/upgrade.md) ### [Troubleshotting](./docs/troubleshooting.md) ## Uninstall Longhorn From 54ebaaa9247a13ebd54e1384d8a5a957f8c24764 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 14:00:31 -0700 Subject: [PATCH 33/44] Update troubleshooting.md --- docs/troubleshooting.md | 45 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 7c26a16..4d5b579 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -1,6 +1,7 @@ -## Troubleshooting +Troubleshooting -### Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it +# Common issues +## Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it Check if volume plugin directory has been set correctly. @@ -10,3 +11,43 @@ But some vendors may choose to change the directory due to various reasons. For User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. +# Troubleshooting guide + +There are a few compontents in the Longhorn. Manager, Engine, Driver and UI. All of those components runnings as pods in the `longhorn-system` namespace by default inside the Kubernetes cluster. + +## UI +Make use of the Longhorn UI is a good start for the troubleshooting. For example, if Kubernetes cannot mount one volume correctly, after stop the workload, try to attach and mount that volume manually on one node and access the content to check if volume is intact. + +Also, the event logs in the UI dashboard provides some information of probably issues. Check for the event logs in `Warning` level. + +## Manager and engines +You can get the log from Longhorn Manager and Engines to help with the troubleshooting. The most useful logs are from `longhorn-manager-xxx`, and the log inside Longhorn Engine, e.g. `-e-xxxx` and `-r-xxxx`. + +Since normally there are multiple Longhorn Manager running at the same time, we recommend using [kubetail](https://github.com/johanhaleby/kubetail) which is a great tool to keep track of the logs of multiple pods. You can use: +``` +kubetail longhorn-system -n longhorn-system +``` +To track the manager logs in real time. + +## CSI driver + +For CSI driver, check the logs for `csi-attacher-0` and `csi-provisioner-0`, as well as containers in `longhorn-csi-plugin-xxx`. + +## Flexvolume driver + +For Flexvolume driver, you need to check the kubelet logs as the first step. Flexvolume driver itself doesn't run inside the container. It's the kubelet process who is responsible for calling the driver. + +If kubelet is running natively on the node, you can use the following command to get the log: +``` +journalctl -u kubelet +``` + +Or if kubelet is running as a container (e.g. in RKE), use the following command instead: +``` +docker logs kubelet +``` + +For even more detail logs of Longhorn Flexvolume, run following command on the node or inside the container (if kubelet is running as a container, e.g. in RKE): +``` +touch /var/log/longhorn_driver.log +``` From 0e5b939e9538cdc3aad0e70c5f764891a4956e6c Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 14:01:01 -0700 Subject: [PATCH 34/44] Update troubleshooting.md --- docs/troubleshooting.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 4d5b579..1253716 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -1,7 +1,7 @@ -Troubleshooting +# Troubleshooting -# Common issues -## Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it +## Common issues +### Volume can be attached/detached from UI, but Kubernetes Pod/StatefulSet etc cannot use it Check if volume plugin directory has been set correctly. @@ -11,16 +11,16 @@ But some vendors may choose to change the directory due to various reasons. For User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. -# Troubleshooting guide +## Troubleshooting guide There are a few compontents in the Longhorn. Manager, Engine, Driver and UI. All of those components runnings as pods in the `longhorn-system` namespace by default inside the Kubernetes cluster. -## UI +### UI Make use of the Longhorn UI is a good start for the troubleshooting. For example, if Kubernetes cannot mount one volume correctly, after stop the workload, try to attach and mount that volume manually on one node and access the content to check if volume is intact. Also, the event logs in the UI dashboard provides some information of probably issues. Check for the event logs in `Warning` level. -## Manager and engines +### Manager and engines You can get the log from Longhorn Manager and Engines to help with the troubleshooting. The most useful logs are from `longhorn-manager-xxx`, and the log inside Longhorn Engine, e.g. `-e-xxxx` and `-r-xxxx`. Since normally there are multiple Longhorn Manager running at the same time, we recommend using [kubetail](https://github.com/johanhaleby/kubetail) which is a great tool to keep track of the logs of multiple pods. You can use: @@ -29,11 +29,11 @@ kubetail longhorn-system -n longhorn-system ``` To track the manager logs in real time. -## CSI driver +### CSI driver For CSI driver, check the logs for `csi-attacher-0` and `csi-provisioner-0`, as well as containers in `longhorn-csi-plugin-xxx`. -## Flexvolume driver +### Flexvolume driver For Flexvolume driver, you need to check the kubelet logs as the first step. Flexvolume driver itself doesn't run inside the container. It's the kubelet process who is responsible for calling the driver. From b8c4ea680d4dfa0f57ec38348574feac1a517138 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 14:08:01 -0700 Subject: [PATCH 35/44] Update troubleshooting.md --- docs/troubleshooting.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 1253716..cee5519 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -11,6 +11,8 @@ But some vendors may choose to change the directory due to various reasons. For User can find the correct directory by running `ps aux|grep kubelet` on the host and check the `--volume-plugin-dir`parameter. If there is none, the default `/usr/libexec/kubernetes/kubelet-plugins/volume/exec/` will be used. +User can also use the [environment check script](../README.md#environment-check-script) for this purpose. + ## Troubleshooting guide There are a few compontents in the Longhorn. Manager, Engine, Driver and UI. All of those components runnings as pods in the `longhorn-system` namespace by default inside the Kubernetes cluster. @@ -35,7 +37,9 @@ For CSI driver, check the logs for `csi-attacher-0` and `csi-provisioner-0`, as ### Flexvolume driver -For Flexvolume driver, you need to check the kubelet logs as the first step. Flexvolume driver itself doesn't run inside the container. It's the kubelet process who is responsible for calling the driver. +For Flexvolume driver, first check where the driver has been installed on the node. Check the log of `longhorn-driver-deployer-xxxx` for that information. + +Then check the kubelet logs. Flexvolume driver itself doesn't run inside the container. It would run along with the kubelet process. If kubelet is running natively on the node, you can use the following command to get the log: ``` From d8b911cb6aa019e7af0135591ca0b72c7023a03b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 14:35:38 -0700 Subject: [PATCH 36/44] Update README.md Fixes https://github.com/rancher/longhorn/issues/233 --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bf8d6c1..d7344b0 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ Noted that the current UI is unauthenticated. # Use the Longhorn with Kubernetes -Longhorn provides persistent volume directly to Kubernetes through one of the Longhorn drivers. No matter which driver you're using, you can use Kubernetes StorageClass to provision your persistent volumes. +Longhorn provides the persistent volume directly to Kubernetes through one of the Longhorn drivers. No matter which driver you're using, you can use Kubernetes StorageClass to provision your persistent volumes. Use following command to create a default Longhorn StorageClass named `longhorn`. @@ -231,6 +231,14 @@ A snapshot in Longhorn represents a volume state at a given time, stored in the User can revert to any previous taken snapshot using the UI. Since Longhorn is a distributed block storage, please make sure the Longhorn volume is umounted from the host when revert to any previous snapshot, otherwise it will confuse the node filesystem and cause corruption. +#### Note about the block level snapshot + +Longhorn is a `crash-consistent` block storage solution. + +It's normal for the OS to keep content in the cache before writing into the block layer. However, it also means if the all the replicas are down, then the Longhorn may not contains the immediate change before the shutdown, since the content was kept in the OS level cache and hadn't transfered to Longhorn system yet. It's similar to if your desktop was down due to a power outage, after resuming the power, you may find some weird files in the hard drive. + +In order to force the data being written to the block layer at any given moment, the user can run `sync` command on the node manually, or umount the disk. OS would write the content from the cache to the block layer in either situation. + ### Backup A backup in Longhorn represents a volume state at a given time, stored in the BackupStore which is outside of the Longhorn System. Backup creation will involving copying the data through the network, so it will take time. @@ -287,14 +295,14 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) ### [Base Image support](./docs/base-image.md) -## Additional instructions for deployment +## Additional informations ### [Google Kubernetes Engine](./docs/gke.md) ### [Upgrade from v0.1/v0.2](./docs/upgrade.md) ### [Troubleshotting](./docs/troubleshooting.md) ## Uninstall Longhorn -Longhorn CRD has the finalizers in them, so user should delete the volumes and related resource first, give manager a chance to clean up after them. +Longhorn CRD has the finalizers in them, so user should delete the volumes and related resource first, give the managers a chance to clean up after them. ### 1. Clean up volume and related resources Noted that you would lose all you data after done this. It's recommended to make backups before proceeding if you intent to keep the data. From b663e6cc629e7e6499974405e90f20a384aaef19 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 15:48:17 -0700 Subject: [PATCH 37/44] Update upgrade.md --- docs/upgrade.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index d972450..c605f33 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -140,12 +140,12 @@ kubectl -n ${NAMESPACE} delete settings.longhorn.rancher.io --all ## Install Longhorn v0.3 ### Installed with Longhorn App v0.1 in Rancher 2.x -For Rancher users who are running Longhorn v0.1, *Do not click the upgrade button.* +For Rancher users who are running Longhorn v0.1, **do not click the upgrade button in the Rancher App.** -1. Delete the Longhorn App from `Catalog Apps` screen in Rancher UI. Launch -Longhorn App template version `0.3-rc4`. -2. Restore Longhorn System. This step is only required for Rancher users running Longhorn App `v0.1`. -Don't change the NAMESPACE variable below. Longhorn system will be installed in the `longhorn-system` namespace. +1. Delete the Longhorn App from `Catalog Apps` screen in Rancher UI. +2. Launch Longhorn App template version `0.3.0`. +3. Restore Longhorn System data. This step is required for Rancher users running Longhorn App `v0.1`. +Don't change the NAMESPACE variable below, since the newly installed Longhorn system will be installed in the `longhorn-system` namespace. ``` NAMESPACE=longhorn-system From 112b6974229eb6fff1db76fca6c0f2d1b73fe44c Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 17:28:06 -0700 Subject: [PATCH 38/44] Update README.md Correct word styling. --- README.md | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index d7344b0..02e353e 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ You can read more details of Longhorn and its design [here](http://rancher.com/m Longhorn is a work in progress. We appreciate your comments as we continue to work on it! -## Source Code +## Source code Longhorn is 100% open source software. Project source code is spread across a number of repos: -1. Longhorn Engine -- Core controller/replica logic https://github.com/rancher/longhorn-engine -1. Longhorn Manager -- Longhorn orchestration, includes Flexvolume driver for Kubernetes https://github.com/rancher/longhorn-manager +1. Longhorn engine -- Core controller/replica logic https://github.com/rancher/longhorn-engine +1. Longhorn manager -- Longhorn orchestration, includes Flexvolume driver for Kubernetes https://github.com/rancher/longhorn-manager 1. Longhorn UI -- Dashboard https://github.com/rancher/longhorn-ui # Demo @@ -27,7 +27,7 @@ Longhorn is 100% open source software. Project source code is spread across a nu 2. Kubernetes v1.8+ 3. Make sure open-iscsi has been installed in all nodes of the Kubernetes cluster. For GKE, recommended Ubuntu as guest OS image since it contains open-iscsi already. -## Kubernetes Driver Requirements +## Kubernetes driver Requirements Longhorn can be used in Kubernetes to provide persistent storage through either Longhorn Container Storage Interface (CSI) driver or Longhorn FlexVolume driver. Longhorn will automatically deploy one of the drivers, depending on the Kubernetes cluster configuration. User can also specify the driver in the deployment yaml file. CSI is preferred. @@ -63,7 +63,7 @@ Please make a note of `Flexvolume Path` and `MountPropagation` state above. 1. Kubernetes v1.10+ 1. CSI is in beta release for this version of Kubernetes, and enabled by default. -2. Mount Propagation feature gate enabled. +2. Mount propagation feature gate enabled. 1. It's enabled by default in Kubernetes v1.10. But some early versions of RKE may not enable it. 3. If above conditions cannot be met, Longhorn will fall back to the FlexVolume driver. @@ -114,7 +114,7 @@ kubectl apply -f longhorn.yaml ``` For Google Kubernetes Engine (GKE) users, see [here](#google-kubernetes-engine) before proceed. -Longhorn Manager and Longhorn Driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. +Longhorn manager and Longhorn driver will be deployed as daemonsets in a separate namespace called `longhorn-system`, as you can see in the yaml file. When you see those pods has started correctly as follows, you've deployed the Longhorn successfully. @@ -166,11 +166,11 @@ longhorn-frontend LoadBalancer 10.20.245.110 100.200.200.123 80:30697/TC If the Kubernetes Cluster supports creating LoadBalancer, user can then use `EXTERNAL-IP`(`100.200.200.123` in the case above) of `longhorn-frontend` to access the Longhorn UI. Otherwise the user can use `:` (port is `30697`in the case above) to access the UI. -Longhorn UI would connect to the Longhorn Manager API, provides the overview of the system, the volume operations, and the snapshot/backup operations. It's highly recommended for the user to check out Longhorn UI. +Longhorn UI would connect to the Longhorn manager API, provides the overview of the system, the volume operations, and the snapshot/backup operations. It's highly recommended for the user to check out Longhorn UI. Noted that the current UI is unauthenticated. -# Use the Longhorn with Kubernetes +# Use Longhorn with Kubernetes Longhorn provides the persistent volume directly to Kubernetes through one of the Longhorn drivers. No matter which driver you're using, you can use Kubernetes StorageClass to provision your persistent volumes. @@ -229,7 +229,7 @@ More examples are available at `./examples/` ### Snapshot A snapshot in Longhorn represents a volume state at a given time, stored in the same location of volume data on physical disk of the host. Snapshot creation is instant in Longhorn. -User can revert to any previous taken snapshot using the UI. Since Longhorn is a distributed block storage, please make sure the Longhorn volume is umounted from the host when revert to any previous snapshot, otherwise it will confuse the node filesystem and cause corruption. +User can revert to any previous taken snapshot using the UI. Since Longhorn is a distributed block storage, please make sure the Longhorn volume is umounted from the host when revert to any previous snapshot, otherwise it will confuse the node filesystem and cause filesystem corruption. #### Note about the block level snapshot @@ -237,18 +237,18 @@ Longhorn is a `crash-consistent` block storage solution. It's normal for the OS to keep content in the cache before writing into the block layer. However, it also means if the all the replicas are down, then the Longhorn may not contains the immediate change before the shutdown, since the content was kept in the OS level cache and hadn't transfered to Longhorn system yet. It's similar to if your desktop was down due to a power outage, after resuming the power, you may find some weird files in the hard drive. -In order to force the data being written to the block layer at any given moment, the user can run `sync` command on the node manually, or umount the disk. OS would write the content from the cache to the block layer in either situation. +To force the data being written to the block layer at any given moment, the user can run `sync` command on the node manually, or umount the disk. OS would write the content from the cache to the block layer in either situation. ### Backup -A backup in Longhorn represents a volume state at a given time, stored in the BackupStore which is outside of the Longhorn System. Backup creation will involving copying the data through the network, so it will take time. +A backup in Longhorn represents a volume state at a given time, stored in the secondary storage (backupstore in Longhorn word) which is outside of the Longhorn system. Backup creation will involving copying the data through the network, so it will take time. A corresponding snapshot is needed for creating a backup. And user can choose to backup any snapshot previous created. -A BackupStore is a NFS server or S3 compatible server. +A backupstore is a NFS server or S3 compatible server. -A BackupTarget represents a BackupStore in the Longhorn System. The BackupTarget can be set at `Settings/General/BackupTarget` +A backup target represents a backupstore in the Longhorn. The backup target can be set at `Settings/General/BackupTarget` -If user is using a S3 compatible server as the BackupTarget, the BackupTargetSecret is needed for authentication informations. User need to manually create it as a Kubernetes Secret in the `longhorn-system` namespace. See below for details. +If user is using a S3 compatible server as the backup target, a backup target secret is needed for authentication informations. User need to manually create it as a Kubernetes Secret in the `longhorn-system` namespace. See below for details. #### Setup a testing backupstore We provides two testing purpose backupstore based on NFS server and Minio S3 server for testing, in `./deploy/backupstores`. @@ -283,17 +283,17 @@ data: ``` Notice the secret must be created in the `longhorn-system` namespace for Longhorn to access. -### Recurring Snapshot and Backup +### Recurring snapshot and backup Longhorn supports recurring snapshot and backup for volumes. User only need to set when he/she wish to take the snapshot and/or backup, and how many snapshots/backups needs to be retains, then Longhorn will automatically create snapshot/backup for the user at that time, as long as the volume is attached to a node. User can find the setting for the recurring snapshot and backup in the `Volume Detail` page. -## Other Features +## Other features -### [Multiple disks support](./docs/multidisk.md) -### [iSCSI support](./docs/iscsi.md) +### [Multiple disks](./docs/multidisk.md) +### [iSCSI](./docs/iscsi.md) ### [Restoring Stateful Set volumes](./docs/restore_statefulset.md) -### [Base Image support](./docs/base-image.md) +### [Base image](./docs/base-image.md) ## Additional informations ### [Google Kubernetes Engine](./docs/gke.md) @@ -302,7 +302,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Uninstall Longhorn -Longhorn CRD has the finalizers in them, so user should delete the volumes and related resource first, give the managers a chance to clean up after them. +Longhorn store its data in the Kubernetes API server, in the format of CRD. Longhorn CRD has the finalizers in them, so user should delete the volumes and related resource first, give the managers a chance to do the clean up after them. ### 1. Clean up volume and related resources Noted that you would lose all you data after done this. It's recommended to make backups before proceeding if you intent to keep the data. @@ -341,7 +341,7 @@ kubectl -n longhorn-system get nodes.longhorn.rancher.io Make sure all reports `No resources found.` before continuing. -### 3. Uninstall Longhorn System +### 3. Uninstall Longhorn ``` kubectl delete -f https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/deploy/longhorn.yaml ``` From f8b7d723e1e259335f9882668b29044f107ce287 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 18:41:13 -0700 Subject: [PATCH 39/44] Update gke.md --- docs/gke.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/gke.md b/docs/gke.md index 1d266f9..3864f7b 100644 --- a/docs/gke.md +++ b/docs/gke.md @@ -1,5 +1,7 @@ # Google Kubernetes Engine +The user need to use `Ubuntu` as the OS on the node, instead of `Container-Optimized OS(default)`, since the latter doesn't support `open-iscsi` which is required by Longhorn. + The configuration yaml will be slight different for Google Kubernetes Engine (GKE): 1. GKE requires user to manually claim himself as cluster admin to enable RBAC. User need to execute following command before create the Longhorn system using yaml files. From cbfa6ba4dfdbd5670b492f18e7b95fe822e15d95 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 18:41:29 -0700 Subject: [PATCH 40/44] Update gke.md --- docs/gke.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gke.md b/docs/gke.md index 3864f7b..b04914d 100644 --- a/docs/gke.md +++ b/docs/gke.md @@ -1,6 +1,6 @@ # Google Kubernetes Engine -The user need to use `Ubuntu` as the OS on the node, instead of `Container-Optimized OS(default)`, since the latter doesn't support `open-iscsi` which is required by Longhorn. +The user must use `Ubuntu` as the OS on the node, instead of `Container-Optimized OS(default)`, since the latter doesn't support `open-iscsi` which is required by Longhorn. The configuration yaml will be slight different for Google Kubernetes Engine (GKE): From 8f5d9acbcab6fe17a5008a35912acf94fb3aacdf Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 18:41:42 -0700 Subject: [PATCH 41/44] Update gke.md --- docs/gke.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gke.md b/docs/gke.md index b04914d..f97844d 100644 --- a/docs/gke.md +++ b/docs/gke.md @@ -1,6 +1,6 @@ # Google Kubernetes Engine -The user must use `Ubuntu` as the OS on the node, instead of `Container-Optimized OS(default)`, since the latter doesn't support `open-iscsi` which is required by Longhorn. +The user must uses `Ubuntu` as the OS on the node, instead of `Container-Optimized OS(default)`, since the latter doesn't support `open-iscsi` which is required by Longhorn. The configuration yaml will be slight different for Google Kubernetes Engine (GKE): From b892623bdc10e7235ff8c98286a431d7f3b147d1 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 20:00:28 -0700 Subject: [PATCH 42/44] Update README.md Update environment_check.sh result. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 02e353e..aae967e 100644 --- a/README.md +++ b/README.md @@ -41,21 +41,21 @@ curl -sSfL https://raw.githubusercontent.com/rancher/longhorn/v0.3-rc/scripts/en ``` Example result: ``` -pod/detect-flexvol-dir created -daemonset.apps/longhorn-environment-check created +pod "detect-flexvol-dir" created +daemonset.apps "longhorn-environment-check" created waiting for pod/detect-flexvol-dir to finish pod/detect-flexvol-dir completed -waiting for pods to become ready (1/7) -waiting for pods to become ready (6/7) -all pods ready (7/7) +all pods ready (3/3) - FlexVolume Path: /var/lib/kubelet/volumeplugins + FLEXVOLUME_DIR="/home/kubernetes/flexvolume" MountPropagation is enabled! +cleaning up detection workloads... pod "detect-flexvol-dir" deleted daemonset.apps "longhorn-environment-check" deleted +clean up completed ``` Please make a note of `Flexvolume Path` and `MountPropagation` state above. From 0953371646b1c651912d2b1b7ddcb5bd86d34aa0 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 22 Aug 2018 20:01:22 -0700 Subject: [PATCH 43/44] Update README.md Fix a typo. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aae967e..f35da84 100644 --- a/README.md +++ b/README.md @@ -298,7 +298,7 @@ User can find the setting for the recurring snapshot and backup in the `Volume D ## Additional informations ### [Google Kubernetes Engine](./docs/gke.md) ### [Upgrade from v0.1/v0.2](./docs/upgrade.md) -### [Troubleshotting](./docs/troubleshooting.md) +### [Troubleshooting](./docs/troubleshooting.md) ## Uninstall Longhorn From d4d94cb82aab58b3c116c25bedd1e445425e64f0 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 23 Aug 2018 14:22:08 -0700 Subject: [PATCH 44/44] Sync with Longhorn manager commit ad90204cc69512b1ed3c0b544d088fa22ebbb5ce Author: Sheng Yang Date: Thu Aug 23 14:12:21 2018 -0700 Update image to rancher/longhorn-manager:v0.3.0 Engine image: rancher/longhorn-engine:v0.3.0 UI Image: rancher/longhorn-ui:v0.3.0 --- deploy/longhorn.yaml | 14 +++++++------- scripts/environment_check.sh | 8 +++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/deploy/longhorn.yaml b/deploy/longhorn.yaml index 6e90798..82a16b6 100644 --- a/deploy/longhorn.yaml +++ b/deploy/longhorn.yaml @@ -178,7 +178,7 @@ spec: spec: containers: - name: longhorn-manager - image: rancher/longhorn-manager:v0.3-rc5 + image: rancher/longhorn-manager:v0.3.0 imagePullPolicy: Always securityContext: privileged: true @@ -187,9 +187,9 @@ spec: - -d - daemon - --engine-image - - rancher/longhorn-engine:v0.3-rc5 + - rancher/longhorn-engine:v0.3.0 - --manager-image - - rancher/longhorn-manager:v0.3-rc5 + - rancher/longhorn-manager:v0.3.0 - --service-account - longhorn-service-account ports: @@ -266,7 +266,7 @@ spec: spec: containers: - name: longhorn-ui - image: rancher/longhorn-ui:v0.3-rc5 + image: rancher/longhorn-ui:v0.3.0 ports: - containerPort: 8000 env: @@ -305,18 +305,18 @@ spec: spec: initContainers: - name: wait-longhorn-manager - image: rancher/longhorn-manager:v0.3-rc5 + image: rancher/longhorn-manager:v0.3.0 command: ['sh', '-c', 'while [ $(curl -m 1 -s -o /dev/null -w "%{http_code}" http://longhorn-backend:9500/v1) != "200" ]; do echo waiting; sleep 2; done'] containers: - name: longhorn-driver-deployer - image: rancher/longhorn-manager:v0.3-rc5 + image: rancher/longhorn-manager:v0.3.0 imagePullPolicy: Always command: - longhorn-manager - -d - deploy-driver - --manager-image - - rancher/longhorn-manager:v0.3-rc5 + - rancher/longhorn-manager:v0.3.0 - --manager-url - http://longhorn-backend:9500/v1 # manually choose "flexvolume" or "csi" diff --git a/scripts/environment_check.sh b/scripts/environment_check.sh index 84eb41a..3d00936 100755 --- a/scripts/environment_check.sh +++ b/scripts/environment_check.sh @@ -99,6 +99,7 @@ EOF } cleanup() { + echo "cleaning up detection workloads..." kubectl delete -f $TEMP_DIR/environment_check.yaml & a=$! kubectl delete -f $TEMP_DIR/detect-flexvol-dir.yaml & @@ -106,6 +107,7 @@ cleanup() { wait $a wait $b rm -rf $TEMP_DIR + echo "clean up completed" } wait_pod_ready() { @@ -125,7 +127,7 @@ wait_pod_ready() { validate_pod() { flexvol_path=$(kubectl logs detect-flexvol-dir) - echo -e "\n FlexVolume Path: ${flexvol_path}\n" + echo -e "\n FLEXVOLUME_DIR=\"${flexvol_path}\"\n" } wait_ds_ready() { @@ -147,7 +149,7 @@ wait_ds_ready() { validate_ds() { local allSupported=true local pods=$(kubectl -l app=longhorn-environment-check get po -o json) - + for ((i=0; i<1; i++)); do local pod=$(echo $pods | jq .items[$i]) local nodeName=$(echo $pod | jq -r .spec.nodeName) @@ -162,7 +164,7 @@ validate_ds() { if [ "$allSupported" != "true" ]; then echo echo " MountPropagation is disabled on at least one node." - echo " As a result, CSI Driver and Base Image aren't supported." + echo " As a result, CSI driver and Base image cannot be supported." echo exit 1 else