chore: setup gce for e2e builds

This PR will provide a basis for running e2e tests on GCE several times
a day. We'll need to add a cron event to the drone repo once merged.

Signed-off-by: Spencer Smith <robertspencersmith@gmail.com>
This commit is contained in:
Spencer Smith 2019-07-15 15:00:31 -04:00 committed by Spencer Smith
parent 88bdedf3e6
commit 089890f36b
10 changed files with 1604 additions and 83 deletions

View File

@ -213,36 +213,6 @@ steps:
depends_on:
- talos-image
# - name: e2e-integration
# image: autonomy/build-container:latest
# pull: always
# ## Ignore errors for now, until we get the flakiness resolved in beta
# err_ignore: true
# environment:
# BUILDKIT_HOST: tcp://buildkitd.ci.svc:1234
# BINDIR: /usr/local/bin
# PACKET_AUTH_TOKEN:
# from_secret: packet_auth_token
# PACKET_PROJECT_ID:
# from_secret: packet_project_id
# PACKET_PXE_SERVER:
# from_secret: packet_pxe_server
# commands:
# - make e2e-integration
# volumes:
# - name: dockersock
# path: /var/run
# - name: tmp
# path: /tmp
# when:
# event:
# - push
# - tag
# branch:
# - master
# depends_on:
# - basic-integration
- name: kernel
image: autonomy/build-container:latest
pull: always
@ -367,6 +337,84 @@ volumes:
- name: tmp
temp: {}
trigger:
cron:
exclude: [ nightly ]
---
kind: pipeline
name: e2e
services:
- name: docker
image: docker:dind
privileged: true
command:
- --dns=8.8.8.8
- --dns=8.8.4.4
- --mtu=1440
volumes:
- name: dockersock
path: /var/run
- name: dev
path: /dev
- name: tmp
path: /tmp
steps:
- name: gce
image: autonomy/build-container:latest
pull: always
environment:
BUILDKIT_HOST: tcp://buildkitd.ci.svc:1234
BINDIR: /usr/local/bin
GCE_SVC_ACCT:
from_secret: gce_svc_acct
TAG: latest
commands:
- make talos-gce
- ./hack/test/gce-setup.sh
volumes:
- name: dockersock
path: /var/run
- name: dev
path: /dev
- name: e2e-integration
image: autonomy/build-container:latest
pull: always
environment:
BUILDKIT_HOST: tcp://buildkitd.ci.svc:1234
BINDIR: /usr/local/bin
PACKET_AUTH_TOKEN:
from_secret: packet_auth_token
PACKET_PROJECT_ID:
from_secret: packet_project_id
PACKET_PXE_SERVER:
from_secret: packet_pxe_server
GCE_SVC_ACCT:
from_secret: gce_svc_acct
commands:
- make osctl-linux
- make e2e-integration
volumes:
- name: dockersock
path: /var/run
- name: tmp
path: /tmp
volumes:
- name: dockersock
temp: {}
- name: dev
host:
path: /dev
- name: tmp
temp: {}
trigger:
cron: [ nightly ]
---
kind: pipeline
name: notify
@ -393,3 +441,4 @@ trigger:
depends_on:
- default
- e2e

1
.gitignore vendored
View File

@ -25,5 +25,4 @@ coverage.txt
sha256sum.txt
sha512sum.txt
**/provider-components.yaml
**/packet-cluster.yaml

View File

@ -36,8 +36,8 @@ endif
BINDIR ?= ./bin
CONFORM_VERSION ?= 57c9dbd
SHA := $(shell $(BINDIR)/gitmeta git sha)
TAG := $(shell $(BINDIR)/gitmeta image tag)
SHA ?= $(shell $(BINDIR)/gitmeta git sha)
TAG ?= $(shell $(BINDIR)/gitmeta image tag)
COMMON_ARGS = --progress=plain
COMMON_ARGS += --frontend=dockerfile.v0
@ -195,11 +195,12 @@ talos: buildkitd
.PHONY: basic-integration
basic-integration:
@KUBERNETES_VERSION=v1.15.0 ./hack/test/$@.sh
@KUBERNETES_VERSION=v1.15.0 TAG=$(TAG) ./hack/test/$@.sh
.PHONY: e2e
e2e-integration:
@KUBERNETES_VERSION=v1.15.0 ./hack/test/$@.sh
## TODO(rsmitty): Bump this k8s version back up once the bug is fixed where kubectl can't scale crds
@KUBERNETES_VERSION=v1.14.4 TAG=latest ./hack/test/$@.sh
.PHONY: test
test: buildkitd

View File

@ -2,6 +2,7 @@
set -eou pipefail
export TALOS_IMG="docker.io/autonomy/talos:${TAG}"
export TMP="$(mktemp -d)"
export OSCTL="${PWD}/build/osctl-linux-amd64"
export TALOSCONFIG="${TMP}/talosconfig"

View File

@ -2,25 +2,30 @@
set -eou pipefail
export TALOS_IMG="docker.io/autonomy/talos:${TAG}"
export TMP="$(mktemp -d)"
export OSCTL="${PWD}/build/osctl-linux-amd64"
export TALOSCONFIG="${TMP}/talosconfig"
export KUBECONFIG="${TMP}/kubeconfig"
## ClusterAPI Provider Talos (CAPT)
CAPT_VERSION="0.1.0-alpha.1"
CAPT_VERSION="0.1.0-alpha.2"
PROVIDER_COMPONENTS="https://github.com/talos-systems/cluster-api-provider-talos/releases/download/v${CAPT_VERSION}/provider-components.yaml"
KUSTOMIZE_VERSION="1.0.11"
KUSTOMIZE_URL="https://github.com/kubernetes-sigs/kustomize/releases/download/v${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_amd64"
SONOBUOY_VERSION="0.14.3"
SONOBUOY_VERSION="0.15.0"
SONOBUOY_URL="https://github.com/heptio/sonobuoy/releases/download/v${SONOBUOY_VERSION}/sonobuoy_${SONOBUOY_VERSION}_linux_amd64.tar.gz"
## Total number of nodes we'll be waiting to come up
NUM_NODES=4
MASTER_IPS="147.75.91.216" #,147.75.91.217,147.75.91.218"
NUM_NODES=6
MASTER_IPS=""
## GCE-specific vars
GCE_PROJECT_NAME="talos-testbed"
GCE_IMAGE_NAME="talos-e2e"
## Long timeout due to packet provisioning times
TIMEOUT=900
TIMEOUT=9000
e2e_run() {
docker run \
@ -38,11 +43,10 @@ e2e_run() {
}
cleanup() {
e2e_run "kubectl delete machine talos-test-cluster-master-0"
e2e_run "kubectl scale machinedeployment talos-test-cluster-workers --replicas=0"
e2e_run "kubectl delete machinedeployment talos-test-cluster-workers"
e2e_run "kubectl delete cluster talos-test-cluster"
e2e_run "kubectl delete machine talos-e2e-master-0 talos-e2e-master-1 talos-e2e-master-2
kubectl scale machinedeployment talos-e2e-workers --replicas=0
kubectl delete machinedeployment talos-e2e-workers
kubectl delete cluster talos-e2e"
${OSCTL} cluster destroy --name integration
rm -rf ${TMP}
}
@ -51,8 +55,10 @@ trap cleanup EXIT
./hack/test/osctl-cluster-create.sh
## Drop in capi stuff
wget -O ${PWD}/hack/test/manifests/provider-components.yaml ${PROVIDER_COMPONENTS}
# wget --quiet -O ${PWD}/hack/test/manifests/provider-components.yaml ${PROVIDER_COMPONENTS}
sed -i "s/{{PACKET_AUTH_TOKEN}}/${PACKET_AUTH_TOKEN}/" ${PWD}/hack/test/manifests/provider-components.yaml
sed -i "s#{{GCE_SVC_ACCT}}#${GCE_SVC_ACCT}#" ${PWD}/hack/test/manifests/capi-secrets.yaml
cat ${PWD}/hack/test/manifests/capi-secrets.yaml
e2e_run "kubectl apply -f /e2emanifests/provider-components.yaml -f /e2emanifests/capi-secrets.yaml"
## Wait for talosconfig in cm then dump it out
@ -64,37 +70,23 @@ e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
exit 1
fi
echo 'Waiting to CAPT pod to be available...'
sleep 5
sleep 10
done"
## Wait for cluster-api-provider-talos-controller-manager-0 to be ready
e2e_run "kubectl wait --timeout=${TIMEOUT}s --for=condition=Ready -n cluster-api-provider-talos-system pod/cluster-api-provider-talos-controller-manager-0"
## Create cluster and create machines in packet
## TODO: Accept list of IPs as env var for the master-ips bit.
git clone --branch v${CAPT_VERSION} https://github.com/talos-systems/cluster-api-provider-talos.git ${TMP}/cluster-api-provider-talos
sed -i "s/\[x.x.x.x, y.y.y.y, z.z.z.z\]/\[${MASTER_IPS}\]/" ${TMP}/cluster-api-provider-talos/config/samples/cluster-deployment/packet/master-ips.yaml
sed -i "s/{{PROJECT_ID}}/${PACKET_PROJECT_ID}/g; s/{{PXE_SERVER}}/${PACKET_PXE_SERVER}/g;" ${TMP}/cluster-api-provider-talos/config/samples/cluster-deployment/packet/platform-config-*.yaml
## Download kustomize and template out capi cluster, then deploy it
e2e_run "apt-get update && apt-get install wget
wget -O /usr/local/bin/kustomize ${KUSTOMIZE_URL}
chmod +x /usr/local/bin/kustomize
kustomize build ${TMP}/cluster-api-provider-talos/config/samples/cluster-deployment/packet > /e2emanifests/packet-cluster.yaml
kubectl apply -f /e2emanifests/packet-cluster.yaml"
e2e_run "kubectl apply -f /e2emanifests/gce-cluster.yaml"
## Wait for talosconfig in cm then dump it out
e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
until kubectl get cm -n cluster-api-provider-talos-system talos-test-cluster-master-0
until kubectl get cm -n cluster-api-provider-talos-system talos-e2e-master-0
do
if [[ \$(date +%s) -gt \$timeout ]]
then
exit 1
fi
sleep 5
done"
e2e_run "kubectl get cm -n cluster-api-provider-talos-system talos-test-cluster-master-0 -o jsonpath='{.data.talosconfig}' > ${TALOSCONFIG}-capi"
sleep 10
done
kubectl get cm -n cluster-api-provider-talos-system talos-e2e-master-0 -o jsonpath='{.data.talosconfig}' > ${TALOSCONFIG}-capi"
## Wait for kubeconfig from capi master-0
e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
@ -104,7 +96,7 @@ e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
then
exit 1
fi
sleep 5
sleep 10
done"
## Wait for nodes to check in
@ -116,7 +108,7 @@ e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
exit 1
fi
KUBECONFIG=${KUBECONFIG}-capi kubectl get nodes -o wide
sleep 5
sleep 10
done"
## Apply psp and flannel
@ -125,22 +117,24 @@ e2e_run "KUBECONFIG=${KUBECONFIG}-capi kubectl apply -f /manifests/psp.yaml -f /
## Wait for nodes ready
e2e_run "KUBECONFIG=${KUBECONFIG}-capi kubectl wait --timeout=${TIMEOUT}s --for=condition=ready=true --all nodes"
# ## Verify that we have an HA controlplane
# e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
# until KUBECONFIG=${KUBECONFIG}-capi kubectl get nodes -l node-role.kubernetes.io/master='' -o json | jq '.items | length' | grep 3 > /dev/null
# do
# if [[ \$(date +%s) -gt \$timeout ]]
# then
# exit 1
# fi
# KUBECONFIG=${KUBECONFIG}-capi kubectl get nodes -l node-role.kubernetes.io/master='' -o json | jq '.items | length'
# sleep 5
# done"
## Verify that we have an HA controlplane
e2e_run "timeout=\$((\$(date +%s) + ${TIMEOUT}))
until KUBECONFIG=${KUBECONFIG}-capi kubectl get nodes -l node-role.kubernetes.io/master='' -o json | jq '.items | length' | grep 3 > /dev/null
do
if [[ \$(date +%s) -gt \$timeout ]]
then
exit 1
fi
KUBECONFIG=${KUBECONFIG}-capi kubectl get nodes -l node-role.kubernetes.io/master='' -o json | jq '.items | length'
sleep 10
done"
## Download sonobuoy and run conformance
e2e_run "apt-get update && apt-get install wget
wget -O /tmp/sonobuoy.tar.gz ${SONOBUOY_URL}
tar -xvf /tmp/sonobuoy.tar.gz -C /usr/local/bin
sonobuoy run --kubeconfig ${KUBECONFIG}-capi --wait --skip-preflight --kube-conformance-image-version v1.14.3 --plugin e2e"
wget --quiet -O /tmp/sonobuoy.tar.gz ${SONOBUOY_URL}
tar -xf /tmp/sonobuoy.tar.gz -C /usr/local/bin
sonobuoy run --kubeconfig ${KUBECONFIG}-capi --wait --skip-preflight --plugin e2e
results=\$(sonobuoy retrieve --kubeconfig ${KUBECONFIG}-capi)
sonobuoy e2e --kubeconfig ${KUBECONFIG}-capi \$results"
exit 0

20
hack/test/gce-setup.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -eou pipefail
## Update secret with service acct info
## Setup svc acct
echo $GCE_SVC_ACCT | base64 -d > /tmp/svc-acct.json
apk add --no-cache python
wget https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-253.0.0-linux-x86_64.tar.gz
tar -xf google-cloud-sdk-253.0.0-linux-x86_64.tar.gz
./google-cloud-sdk/install.sh --disable-installation-options --quiet
./google-cloud-sdk/bin/gcloud auth activate-service-account --key-file /tmp/svc-acct.json
## Push talos-gce to storage bucket
./google-cloud-sdk/bin/gsutil cp ./build/talos-gce.tar.gz gs://talos-e2e
## Create image from talos-gce
./google-cloud-sdk/bin/gcloud --quiet --project talos-testbed compute images delete talos-e2e
./google-cloud-sdk/bin/gcloud --quiet --project talos-testbed compute images create talos-e2e --source-uri gs://talos-e2e/talos-gce.tar.gz

View File

@ -10,3 +10,5 @@ kind: Secret
metadata:
name: gce-credentials
namespace: cluster-api-provider-talos-system
data:
service-account.json: "{{GCE_SVC_ACCT}}"

View File

@ -0,0 +1,126 @@
apiVersion: cluster.k8s.io/v1alpha1
kind: Cluster
metadata:
annotations: null
name: talos-e2e
spec:
clusterNetwork:
pods:
cidrBlocks:
- 192.168.0.0/16
serviceDomain: cluster.local
services:
cidrBlocks:
- 10.96.0.0/12
providerSpec:
value:
apiVersion: talosproviderconfig/v1alpha1
kind: TalosClusterProviderSpec
masters:
ips:
- 35.206.100.52
- 35.208.36.204
- 35.209.145.137
---
apiVersion: cluster.k8s.io/v1alpha1
kind: Machine
metadata:
labels:
cluster.k8s.io/cluster-name: talos-e2e
set: master
name: talos-e2e-master-0
spec:
providerSpec:
value:
apiVersion: talosproviderconfig/v1alpha1
kind: TalosMachineProviderSpec
platform:
config: |-
zone: "us-central1-c"
project: "talos-testbed"
instances:
type: "n1-standard-2"
image: "https://www.googleapis.com/compute/v1/projects/talos-testbed/global/images/talos-e2e"
disks:
size: 50
type: gce
---
apiVersion: cluster.k8s.io/v1alpha1
kind: Machine
metadata:
labels:
cluster.k8s.io/cluster-name: talos-e2e
set: master
name: talos-e2e-master-1
spec:
providerSpec:
value:
apiVersion: talosproviderconfig/v1alpha1
kind: TalosMachineProviderSpec
platform:
config: |-
zone: "us-central1-c"
project: "talos-testbed"
instances:
type: "n1-standard-2"
image: "https://www.googleapis.com/compute/v1/projects/talos-testbed/global/images/talos-e2e"
disks:
size: 50
type: gce
---
apiVersion: cluster.k8s.io/v1alpha1
kind: Machine
metadata:
labels:
cluster.k8s.io/cluster-name: talos-e2e
set: master
name: talos-e2e-master-2
spec:
providerSpec:
value:
apiVersion: talosproviderconfig/v1alpha1
kind: TalosMachineProviderSpec
platform:
config: |-
zone: "us-central1-c"
project: "talos-testbed"
instances:
type: "n1-standard-2"
image: "https://www.googleapis.com/compute/v1/projects/talos-testbed/global/images/talos-e2e"
disks:
size: 50
type: gce
---
apiVersion: cluster.k8s.io/v1alpha1
kind: MachineDeployment
metadata:
labels:
cluster.k8s.io/cluster-name: talos-e2e
set: worker
name: talos-e2e-workers
spec:
replicas: 3
selector:
matchLabels:
cluster.k8s.io/cluster-name: talos-e2e
set: worker
template:
metadata:
labels:
cluster.k8s.io/cluster-name: talos-e2e
set: worker
spec:
providerSpec:
value:
apiVersion: talosproviderconfig/v1alpha1
kind: TalosMachineProviderSpec
platform:
config: |-
zone: "us-central1-c"
project: "talos-testbed"
instances:
type: "n1-standard-2"
image: "https://www.googleapis.com/compute/v1/projects/talos-testbed/global/images/talos-e2e"
disks:
size: 50
type: gce

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@ run() {
k8s.gcr.io/hyperkube:${KUBERNETES_VERSION} -c "${1}"
}
${OSCTL} cluster create --name integration
${OSCTL} cluster create --name integration --image ${TALOS_IMG} --mtu 1440
${OSCTL} config target 10.5.0.2
## Fetch kubeconfig