From 151c9df091f32d00748e7e5effbb2c759916e8b9 Mon Sep 17 00:00:00 2001 From: Noel Georgi Date: Fri, 21 Jan 2022 19:07:13 +0530 Subject: [PATCH] chore: add CSI tests for e2e-qemu Add tests for using rook as CSI for e2e-qemu Allow specifying cpu/memory for workers Signed-off-by: Noel Georgi --- .drone.jsonnet | 13 ++++++ Makefile | 9 +++- cmd/talosctl/cmd/mgmt/cluster/create.go | 47 +++++++++++++++------ hack/test/e2e-qemu.sh | 5 +++ hack/test/e2e.sh | 18 ++++++++ website/content/docs/v0.15/Reference/cli.md | 8 +++- 6 files changed, 84 insertions(+), 16 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index f7f08d302..85b06a378 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -403,6 +403,17 @@ local integration_qemu_encrypted_vip = Step("e2e-encrypted-vip", target="e2e-qem "IMAGE_REGISTRY": local_registry, }); +local integration_qemu_csi = Step("e2e-csi", target="e2e-qemu", privileged=true, depends_on=[load_artifacts], environment={ + "IMAGE_REGISTRY": local_registry, + "SHORT_INTEGRATION_TEST": "yes", + "QEMU_WORKERS": "3", + "QEMU_CPUS_WORKERS": "4", + "QEMU_MEMORY_WORKERS": "5120", + "QEMU_EXTRA_DISKS": "1", + "QEMU_EXTRA_DISKS_SIZE": "12288", + "WITH_TEST": "run_csi_tests", +}); + local integration_images = Step("images", target="images", depends_on=[load_artifacts], environment={"IMAGE_REGISTRY": local_registry}); local integration_sbcs = Step("sbcs", target="sbcs", depends_on=[integration_images], environment={"IMAGE_REGISTRY": local_registry}); @@ -443,6 +454,7 @@ local integration_pipelines = [ Pipeline('integration-misc', default_pipeline_steps + [integration_gvisor, integration_cilium, integration_uefi, integration_disk_image, integration_canal_reset, integration_no_cluster_discovery, integration_kubespan]) + integration_trigger(['integration-misc']), Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']), Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']), + Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']), Pipeline('integration-images', default_pipeline_steps + [integration_images, integration_sbcs]) + integration_trigger(['integration-images']), // cron pipelines, triggered on schedule events @@ -453,6 +465,7 @@ local integration_pipelines = [ Pipeline('cron-integration-misc', default_pipeline_steps + [integration_gvisor, integration_cilium, integration_uefi, integration_disk_image, integration_canal_reset, integration_no_cluster_discovery, integration_kubespan], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']), Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']), Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']), + Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']), Pipeline('cron-integration-images', default_pipeline_steps + [integration_images, integration_sbcs], [default_cron_pipeline]) + cron_trigger(['nightly']), ]; diff --git a/Makefile b/Makefile index 70f7ca707..41f84457c 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,7 @@ TALOSCTL_DEFAULT_TARGET := talosctl-$(OPERATING_SYSTEM) INTEGRATION_TEST_DEFAULT_TARGET := integration-test-$(OPERATING_SYSTEM) INTEGRATION_TEST_PROVISION_DEFAULT_TARGET := integration-test-provision-$(OPERATING_SYSTEM) KUBECTL_URL ?= https://storage.googleapis.com/kubernetes-release/release/v1.23.3/bin/$(OPERATING_SYSTEM)/amd64/kubectl +KUBESTR_URL ?= https://github.com/kastenhq/kubestr/releases/download/v0.4.31/kubestr_0.4.31_Linux_amd64.tar.gz CLUSTERCTL_VERSION ?= 1.0.2 CLUSTERCTL_URL ?= https://github.com/kubernetes-sigs/cluster-api/releases/download/v$(CLUSTERCTL_VERSION)/clusterctl-$(OPERATING_SYSTEM)-amd64 TESTPKGS ?= github.com/talos-systems/talos/... @@ -307,12 +308,17 @@ $(ARTIFACTS)/kubectl: @curl -L -o $(ARTIFACTS)/kubectl "$(KUBECTL_URL)" @chmod +x $(ARTIFACTS)/kubectl +$(ARTIFACTS)/kubestr: + @mkdir -p $(ARTIFACTS) + @curl -L "$(KUBESTR_URL)" | tar xzf - -C $(ARTIFACTS) kubestr + @chmod +x $(ARTIFACTS)/kubestr + $(ARTIFACTS)/clusterctl: @mkdir -p $(ARTIFACTS) @curl -L -o $(ARTIFACTS)/clusterctl "$(CLUSTERCTL_URL)" @chmod +x $(ARTIFACTS)/clusterctl -e2e-%: $(ARTIFACTS)/$(INTEGRATION_TEST_DEFAULT_TARGET)-amd64 $(ARTIFACTS)/kubectl $(ARTIFACTS)/clusterctl ## Runs the E2E test for the specified platform (e.g. e2e-docker). +e2e-%: $(ARTIFACTS)/$(INTEGRATION_TEST_DEFAULT_TARGET)-amd64 $(ARTIFACTS)/kubectl $(ARTIFACTS)/clusterctl $(ARTIFACTS)/kubestr ## Runs the E2E test for the specified platform (e.g. e2e-docker). @$(MAKE) hack-test-$@ \ PLATFORM=$* \ TAG=$(TAG) \ @@ -326,6 +332,7 @@ e2e-%: $(ARTIFACTS)/$(INTEGRATION_TEST_DEFAULT_TARGET)-amd64 $(ARTIFACTS)/kubect SHORT_INTEGRATION_TEST=$(SHORT_INTEGRATION_TEST) \ CUSTOM_CNI_URL=$(CUSTOM_CNI_URL) \ KUBECTL=$(PWD)/$(ARTIFACTS)/kubectl \ + KUBESTR=$(PWD)/$(ARTIFACTS)/kubestr \ CLUSTERCTL=$(PWD)/$(ARTIFACTS)/clusterctl provision-tests-prepare: release-artifacts $(ARTIFACTS)/$(INTEGRATION_TEST_PROVISION_DEFAULT_TARGET)-amd64 diff --git a/cmd/talosctl/cmd/mgmt/cluster/create.go b/cmd/talosctl/cmd/mgmt/cluster/create.go index fde426ee4..967cd2699 100644 --- a/cmd/talosctl/cmd/mgmt/cluster/create.go +++ b/cmd/talosctl/cmd/mgmt/cluster/create.go @@ -80,10 +80,14 @@ var ( dnsDomain string workers int masters int - clusterCpus string - clusterMemory int + controlPlaneCpus string + workersCpus string + controlPlaneMemory int + workersMemory int clusterDiskSize int clusterDisks []string + extraDisks int + extraDiskSize int targetArch string clusterWait bool clusterWaitTimeout time.Duration @@ -131,12 +135,18 @@ func create(ctx context.Context) (err error) { return fmt.Errorf("number of masters can't be less than 1") } - nanoCPUs, err := parseCPUShare() + controlPlaneNanoCPUs, err := parseCPUShare(controlPlaneCpus) if err != nil { return fmt.Errorf("error parsing --cpus: %s", err) } - memory := int64(clusterMemory) * 1024 * 1024 + workerNanoCPUs, err := parseCPUShare(workersCpus) + if err != nil { + return fmt.Errorf("error parsing --cpus-workers: %s", err) + } + + controlPlaneMemory := int64(controlPlaneMemory) * 1024 * 1024 + workerMemory := int64(workersMemory) * 1024 * 1024 // Validate CIDR range and allocate IPs fmt.Println("validating CIDR and reserving IPs") @@ -518,8 +528,8 @@ func create(ctx context.Context) (err error) { Name: fmt.Sprintf("%s-master-%d", clusterName, i+1), Type: machine.TypeControlPlane, IPs: nodeIPs, - Memory: memory, - NanoCPUs: nanoCPUs, + Memory: controlPlaneMemory, + NanoCPUs: controlPlaneNanoCPUs, Disks: disks, SkipInjectingConfig: skipInjectingConfig, BadRTC: badRTC, @@ -548,6 +558,13 @@ func create(ctx context.Context) (err error) { request.Nodes = append(request.Nodes, nodeReq) } + // append extra disks + for i := 0; i < extraDisks; i++ { + disks = append(disks, &provision.Disk{ + Size: uint64(extraDiskSize) * 1024 * 1024, + }) + } + for i := 1; i <= workers; i++ { name := fmt.Sprintf("%s-worker-%d", clusterName, i) @@ -570,8 +587,8 @@ func create(ctx context.Context) (err error) { Name: name, Type: machine.TypeWorker, IPs: nodeIPs, - Memory: memory, - NanoCPUs: nanoCPUs, + Memory: workerMemory, + NanoCPUs: workerNanoCPUs, Disks: disks, Config: cfg, SkipInjectingConfig: skipInjectingConfig, @@ -705,10 +722,10 @@ func mergeKubeconfig(ctx context.Context, clusterAccess *access.Adapter) error { return merger.Write(kubeconfigPath) } -func parseCPUShare() (int64, error) { - cpu, ok := new(big.Rat).SetString(clusterCpus) +func parseCPUShare(cpus string) (int64, error) { + cpu, ok := new(big.Rat).SetString(cpus) if !ok { - return 0, fmt.Errorf("failed to parsing as a rational number: %s", clusterCpus) + return 0, fmt.Errorf("failed to parsing as a rational number: %s", cpus) } nano := cpu.Mul(cpu, big.NewRat(1e9, 1)) @@ -808,10 +825,14 @@ func init() { createCmd.Flags().StringSliceVar(&nameservers, "nameservers", []string{"8.8.8.8", "1.1.1.1", "2001:4860:4860::8888", "2606:4700:4700::1111"}, "list of nameservers to use") createCmd.Flags().IntVar(&workers, "workers", 1, "the number of workers to create") createCmd.Flags().IntVar(&masters, "masters", 1, "the number of masters to create") - createCmd.Flags().StringVar(&clusterCpus, "cpus", "2.0", "the share of CPUs as fraction (each container/VM)") - createCmd.Flags().IntVar(&clusterMemory, "memory", 2048, "the limit on memory usage in MB (each container/VM)") + createCmd.Flags().StringVar(&controlPlaneCpus, "cpus", "2.0", "the share of CPUs as fraction (each control plane/VM)") + createCmd.Flags().StringVar(&workersCpus, "cpus-workers", "2.0", "the share of CPUs as fraction (each worker/VM)") + createCmd.Flags().IntVar(&controlPlaneMemory, "memory", 2048, "the limit on memory usage in MB (each control plane/VM)") + createCmd.Flags().IntVar(&workersMemory, "memory-workers", 2048, "the limit on memory usage in MB (each worker/VM)") createCmd.Flags().IntVar(&clusterDiskSize, "disk", 6*1024, "default limit on disk size in MB (each VM)") createCmd.Flags().StringSliceVar(&clusterDisks, "user-disk", []string{}, "list of disks to create for each VM in format: :::") + createCmd.Flags().IntVar(&extraDisks, "extra-disks", 0, "number of extra disks to create for each worker VM") + createCmd.Flags().IntVar(&extraDiskSize, "extra-disks-size", 5*1024, "default limit on disk size in MB (each VM)") createCmd.Flags().StringVar(&targetArch, "arch", stdruntime.GOARCH, "cluster architecture") createCmd.Flags().BoolVar(&clusterWait, "wait", true, "wait for the cluster to be ready before returning") createCmd.Flags().DurationVar(&clusterWaitTimeout, "wait-timeout", 20*time.Minute, "timeout to wait for the cluster to be ready") diff --git a/hack/test/e2e-qemu.sh b/hack/test/e2e-qemu.sh index 80db92fc8..ecbe94db5 100755 --- a/hack/test/e2e-qemu.sh +++ b/hack/test/e2e-qemu.sh @@ -91,9 +91,14 @@ function create_cluster { --kubernetes-version=${KUBERNETES_VERSION} \ --masters=3 \ --workers="${QEMU_WORKERS:-1}" \ + --disk=15360 \ + --extra-disks="${QEMU_EXTRA_DISKS:-0}" \ + --extra-disks-size="${QEMU_EXTRA_DISKS_SIZE:-5120}" \ --mtu=1450 \ --memory=2048 \ + --memory-workers="${QEMU_MEMORY_WORKERS:-2048}" \ --cpus="${QEMU_CPUS:-2}" \ + --cpus-workers="${QEMU_CPUS_WORKERS:-2}" \ --cidr=172.20.1.0/24 \ --user-disk=/var/lib/extra:100MB \ --user-disk=/var/lib/p1:100MB:/var/lib/p2:100MB \ diff --git a/hack/test/e2e.sh b/hack/test/e2e.sh index ac04c94e2..4ed172578 100755 --- a/hack/test/e2e.sh +++ b/hack/test/e2e.sh @@ -217,3 +217,21 @@ function run_gvisor_test { sleep 10 ${KUBECTL} wait --for=condition=ready pod nginx-gvisor --timeout=1m } + +function run_csi_tests { + rm -rf "${TMP}/rook" + git clone --depth=1 --single-branch --branch v1.8.2 https://github.com/rook/rook.git "${TMP}/rook" + pushd "${TMP}/rook/deploy/examples" + ${KUBECTL} apply -f crds.yaml -f common.yaml -f operator.yaml + ${KUBECTL} apply -f cluster.yaml + # wait for the controller to populate the status field + sleep 30 + ${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.phase}=Ready' cephclusters.ceph.rook.io/rook-ceph + ${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.state}=Created' cephclusters.ceph.rook.io/rook-ceph + # .status.ceph is populated later only + sleep 60 + ${KUBECTL} --namespace rook-ceph wait --timeout=900s --for=jsonpath='{.status.ceph.health}=HEALTH_OK' cephclusters.ceph.rook.io/rook-ceph + ${KUBECTL} create -f csi/rbd/storageclass.yaml + # hack until https://github.com/kastenhq/kubestr/issues/101 is addressed + KUBERNETES_SERVICE_HOST= KUBECONFIG="${TMP}/kubeconfig" ${KUBESTR} fio --storageclass rook-ceph-block --size 10G +} diff --git a/website/content/docs/v0.15/Reference/cli.md b/website/content/docs/v0.15/Reference/cli.md index efcf23864..3ef2a7c35 100644 --- a/website/content/docs/v0.15/Reference/cli.md +++ b/website/content/docs/v0.15/Reference/cli.md @@ -97,7 +97,8 @@ talosctl cluster create [flags] --config-patch string patch generated machineconfigs (applied to all node types) --config-patch-control-plane string patch generated machineconfigs (applied to 'init' and 'controlplane' types) --config-patch-worker string patch generated machineconfigs (applied to 'worker' type) - --cpus string the share of CPUs as fraction (each container/VM) (default "2.0") + --cpus string the share of CPUs as fraction (each control plane/VM) (default "2.0") + --cpus-workers string the share of CPUs as fraction (each worker/VM) (default "2.0") --crashdump print debug crashdump to stderr when cluster startup fails --custom-cni-url string install custom CNI from the URL (Talos cluster) --disk int default limit on disk size in MB (each VM) (default 6144) @@ -109,6 +110,8 @@ talosctl cluster create [flags] --endpoint string use endpoint instead of provider defaults -p, --exposed-ports string Comma-separated list of ports/protocols to expose on init node. Ex -p :/ (Docker provisioner only) --extra-boot-kernel-args string add extra kernel args to the initial boot from vmlinuz and initramfs (QEMU only) + --extra-disks int number of extra disks to create for each worker VM + --extra-disks-size int default limit on disk size in MB (each VM) (default 5120) -h, --help help for create --image string the image to use (default "ghcr.io/talos-systems/talos:latest") --init-node-as-endpoint use init node as endpoint instead of any load balancer endpoint @@ -120,7 +123,8 @@ talosctl cluster create [flags] --iso-path string the ISO path to use for the initial boot (VM only) --kubernetes-version string desired kubernetes version to run (default "1.23.3") --masters int the number of masters to create (default 1) - --memory int the limit on memory usage in MB (each container/VM) (default 2048) + --memory int the limit on memory usage in MB (each control plane/VM) (default 2048) + --memory-workers int the limit on memory usage in MB (each worker/VM) (default 2048) --mtu int MTU of the cluster network (default 1500) --nameservers strings list of nameservers to use (default [8.8.8.8,1.1.1.1,2001:4860:4860::8888,2606:4700:4700::1111]) --registry-insecure-skip-verify strings list of registry hostnames to skip TLS verification for