diff --git a/.github/workflows/test-ocp.yml b/.github/workflows/test-ocp.yml index adaa7894..238429f9 100644 --- a/.github/workflows/test-ocp.yml +++ b/.github/workflows/test-ocp.yml @@ -48,6 +48,12 @@ jobs: OPENSHIFT_USER: ${{ secrets.OPENSHIFT_USER }} OPENSHIFT_PASSWORD: ${{ secrets.OPENSHIFT_PASSWORD }} + - name: Install Virtctl + run: | + export VERSION=$(curl https://storage.googleapis.com/kubevirt-prow/release/kubevirt/kubevirt/stable.txt) + wget https://github.com/kubevirt/kubevirt/releases/download/${VERSION}/virtctl-${VERSION}-linux-amd64 -O /tmp/virtctl + chmod +x /tmp/virtctl + - name: Execute Tests run: | export PATH=${PATH}:/tmp/ diff --git a/README.md b/README.md index 9246dd5c..59cb7f59 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This plugin is a very opinionated OpenShift wrapper designed to simplify the exe Executed with `kube-burner-ocp`, it looks like: ```console -$ kube-burner-ocp help +$ kube-burner-ocp --help kube-burner plugin designed to be used with OpenShift clusters as a quick way to run well-known workloads Usage: @@ -29,6 +29,8 @@ Available Commands: pvc-density Runs pvc-density workload udn-density-l3-pods Runs udn-density-l3-pods workload version Print the version number of kube-burner + virt-capacity-benchmark Runs capacity-benchmark workload + virt-density Runs virt-density workload web-burner-cluster-density Runs web-burner-cluster-density workload web-burner-init Runs web-burner-init workload web-burner-node-density Runs web-burner-node-density workload @@ -86,7 +88,7 @@ kube-burner-ocp cluster-density-v2 --iterations=1 --churn-duration=2m0s --churn- ### metrics-endpoints.yaml ```yaml -- endpoint: prometheus-k8s-openshift-monitoring.apps.rook.devshift.org +- endpoint: prometheus-k8s-openshift-monitoring.apps.rook.devshift.org metrics: - metrics.yml alerts: @@ -97,7 +99,7 @@ kube-burner-ocp cluster-density-v2 --iterations=1 --churn-duration=2m0s --churn- defaultIndex: {{.ES_INDEX}} type: opensearch - endpoint: https://prometheus-k8s-openshift-monitoring.apps.rook.devshift.org - token: {{ .TOKEN }} + token: {{ .TOKEN }} metrics: - metrics.yml indexer: @@ -387,6 +389,71 @@ Input parameters specific to the workload: | dpdk-cores | Number of cores assigned for each DPDK pod (should fill all the isolated cores of one NUMA node) | 2 | | performance-profile | Name of the performance profile implemented on the cluster | default | + +## Virt Workloads + +This workload family is a focused on Virtualization creating different objects across the cluster. + +The different variants are: +- [virt-density](#virt-density) +- [virt-capacity-benchmark](#virt-capacity-benchmark). + +### Virt Density + +### Virt Capacity Benchmark + +Test the capacity of Virtual Machines and Volumes supported by the cluster and a specific storage class. + +#### Environment Requirements + +In order to verify that the `VirtualMachine` completed their boot and that volume resize propagated successfully, the test uses `virtctl ssh`. +Therefore, `virtctl` must be installed and available in the `PATH`. + +See the [Temporary SSH Keys](#temporary-ssh-keys) for details on the SSH keys used for the test + +#### Test Sequence + +The test runs a workload in a loop without deleting previously created resources. By default it will continue until a failure occurs. +Each loop is comprised of the following steps: +- Create VMs +- Resize the root and data volumes +- Restart the VMs +- Snapshot the VMs +- Migrate the VMs + +#### Tested StorageClass + +By default, the test will search for the `StorageClass` to use: + +1. Use the default `StorageClass` for Virtualization annotated with `storageclass.kubevirt.io/is-default-virt-class` +2. If does not exist, use general default `StorageClass` annotated with `storageclass.kubernetes.io/is-default-class` +3. If does not exist, fail the test before starting + +To use a different one, use `--storage-class` to provide a different name. + +Please note that regardless to which `StorageClass` is used, it must: +- Support Volume Expansion: `allowVolumeExpansion: true`. +- Have a corresponding `VolumeSnapshotClass` using the same provisioner + +#### Test Namespace + +All `VirtualMachines` are created in the same namespace. + +By default, the namespace is `virt-capacity-benchmark`. Set it by passing `--namespace` (or `-n`) + +#### Test Size Parameters + +Users may control the workload sizes by passing the following arguments: +- `--max-iterations` - Maximum number of iterations, or 0 (default) for infinite. In any case, the test will stop upon failure +- `--vms` - Number of VMs for each iteration (default 5) +- `--data-volume-count` - Number of data volumes for each VM (default 9) + +#### Temporary SSH Keys + +The test generated the SSH keys automatically. +By default, it stores the pair in a temporary directory. +Users may choose the store the key in a specified directory by setting `--ssh-key-path` + ## Custom Workload: Bring your own workload To kickstart kube-burner-ocp with a custom workload, `init` becomes your go-to command. This command is equipped with flags that enable to seamlessly integrate and run your personalized workloads. Here's a breakdown of the flags accepted by the init command: diff --git a/cmd/config/virt-capacity-benchmark/check.sh b/cmd/config/virt-capacity-benchmark/check.sh new file mode 100755 index 00000000..9794a2ab --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/check.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +COMMAND=$1 +LABEL_KEY=$2 +LABEL_VALUE=$3 +NAMESPACE=$4 +IDENTITY_FILE=$5 +REMOTE_USER=$6 +EXPECTED_ROOT_SIZE=$7 +EXPECTED_DATA_SIZE=$8 + +# Wait up to ~60 minutes +MAX_RETRIES=130 +# In the first reties use a shorter sleep +MAX_SHORT_WAITS=12 +SHORT_WAIT=5 +LONG_WAIT=30 + +if virtctl ssh --help | grep -qc "\--local-ssh " ; then + LOCAL_SSH="--local-ssh" +else + LOCAL_SSH="" +fi + +get_vms() { + local namespace=$1 + local label_key=$2 + local label_value=$3 + + local vms + vms=$(kubectl get vm -n "${namespace}" -l "${label_key}"="${label_value}" -o json | jq .items | jq -r '.[] | .metadata.name') + local ret=$? + if [ $ret -ne 0 ]; then + echo "Failed to get VM list" + exit 1 + fi + echo "${vms}" +} + +remote_command() { + local namespace=$1 + local identity_file=$2 + local remote_user=$3 + local vm_name=$4 + local command=$5 + + local output + output=$(virtctl ssh ${LOCAL_SSH} --local-ssh-opts="-o StrictHostKeyChecking=no" --local-ssh-opts="-o UserKnownHostsFile=/dev/null" -n "${namespace}" -i "${identity_file}" -c "${command}" --username "${remote_user}" "${vm_name}" 2>/dev/null) + local ret=$? + if [ $ret -ne 0 ]; then + return 1 + fi + echo "${output}" +} + +check_vm_running() { + local vm=$1 + remote_command "${NAMESPACE}" "${IDENTITY_FILE}" "${REMOTE_USER}" "${vm}" "ls" + return $? +} + +check_resize() { + local vm=$1 + + local blk_devices + blk_devices=$(remote_command "${NAMESPACE}" "${IDENTITY_FILE}" "${REMOTE_USER}" "${vm}" "lsblk --json -v --output=NAME,SIZE") + local ret=$? + if [ $ret -ne 0 ]; then + return $ret + fi + + local size + size=$(echo "${blk_devices}" | jq .blockdevices | jq -r --arg name "vda" '.[] | select(.name == $name) | .size') + if [[ $size != "${EXPECTED_ROOT_SIZE}" ]]; then + return 1 + fi + + local datavolume_sizes + datavolume_sizes=$(echo "${blk_devices}" | jq .blockdevices | jq -r --arg name "vda" '.[] | select(.name != $name) | .size') + for datavolume_size in ${datavolume_sizes}; do + if [[ $datavolume_size != "${EXPECTED_DATA_SIZE}" ]]; then + return 1 + fi + done + + return 0 +} + +VMS=$(get_vms "${NAMESPACE}" "${LABEL_KEY}" "${LABEL_VALUE}") + +for vm in ${VMS}; do + for attempt in $(seq 1 $MAX_RETRIES); do + if ${COMMAND} "${vm}"; then + break + fi + if [ "${attempt}" -lt $MAX_RETRIES ]; then + if [ "${attempt}" -lt $MAX_SHORT_WAITS ]; then + sleep "${SHORT_WAIT}" + else + sleep "${LONG_WAIT}" + fi + else + echo "Failed waiting on ${COMMAND} for ${vm}" >&2 + exit 1 + fi + done + echo "${COMMAND} finished successfully for ${vm}" +done diff --git a/cmd/config/virt-capacity-benchmark/templates/resize_pvc.yml b/cmd/config/virt-capacity-benchmark/templates/resize_pvc.yml new file mode 100644 index 00000000..659823dd --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/templates/resize_pvc.yml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +spec: + resources: + requests: + storage: {{ .storageSize }} diff --git a/cmd/config/virt-capacity-benchmark/templates/secret_ssh_public.yml b/cmd/config/virt-capacity-benchmark/templates/secret_ssh_public.yml new file mode 100644 index 00000000..f3eba27a --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/templates/secret_ssh_public.yml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: "{{ .name }}-{{ .counter }}" +type: Opaque +data: + key: {{ .publicKeyPath | ReadFile | b64enc }} \ No newline at end of file diff --git a/cmd/config/virt-capacity-benchmark/templates/vm-snapshot.yml b/cmd/config/virt-capacity-benchmark/templates/vm-snapshot.yml new file mode 100644 index 00000000..42fbed5a --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/templates/vm-snapshot.yml @@ -0,0 +1,14 @@ +apiVersion: snapshot.kubevirt.io/v1beta1 +kind: VirtualMachineSnapshot +metadata: + name: "{{ .name }}-{{ .counter }}-{{ .Replica }}" + labels: + {{range $key, $value := .snapshotLabels }} + {{ $key }}: {{ $value }} + {{end}} +spec: + deletionPolicy: delete + source: + apiGroup: kubevirt.io + kind: VirtualMachine + name: "{{ .name }}-{{ .counter }}-{{ .Replica }}" diff --git a/cmd/config/virt-capacity-benchmark/templates/vm.yml b/cmd/config/virt-capacity-benchmark/templates/vm.yml new file mode 100644 index 00000000..fa4ed10a --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/templates/vm.yml @@ -0,0 +1,106 @@ +{{- $storageClassName := .storageClassName -}} +{{- $dataVolumeLabels := .dataVolumeLabels -}} +{{- $dataVolumeSize := (default "1Gi" .dataVolumeSize) -}} +{{- $name := .name -}} +{{- $counter := .counter -}} +{{- $replica := .Replica }} +{{- $accessMode := .accessMode -}} + +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + name: "{{ $name }}-{{ $counter }}-{{ $replica }}" + labels: + {{range $key, $value := .vmLabels }} + {{ $key }}: {{ $value }} + {{end}} +spec: + dataVolumeTemplates: + - metadata: + name: "{{ $name }}-{{ $counter }}-{{ $replica }}-root" + labels: + {{range $key, $value := .rootVolumeLabels }} + {{ $key }}: {{ $value }} + {{end}} + spec: + source: + registry: + url: "docker://{{ .rootDiskImage }}" + storage: + accessModes: + - {{ $accessMode }} + storageClassName: {{ .storageClassName }} + resources: + requests: + storage: {{ default "10Gi" .rootVolumeSize }} + {{ range $dataVolumeIndex := .dataVolumeCounters }} + - metadata: + name: "{{ $name }}-{{ $counter }}-{{ $replica }}-data-{{ $dataVolumeIndex }}" + labels: + {{range $key, $value := $dataVolumeLabels }} + {{ $key }}: {{ $value }} + {{end}} + spec: + source: + blank: {} + storage: + accessModes: + - {{ $accessMode }} + storageClassName: {{ $storageClassName }} + resources: + requests: + storage: {{ $dataVolumeSize }} + {{ end }} + running: true + template: + spec: + accessCredentials: + - sshPublicKey: + propagationMethod: + noCloud: {} + source: + secret: + secretName: "{{ .sshPublicKeySecret }}-{{ .counter }}" + architecture: amd64 + domain: + resources: + requests: + memory: {{ default "512Mi" .vmMemory }} + devices: + disks: + - disk: + bus: virtio + name: rootdisk + bootOrder: 1 + {{ range $dataVolumeIndex := .dataVolumeCounters }} + - disk: + bus: virtio + name: "data-{{ $dataVolumeIndex }}" + {{ end }} + interfaces: + - name: default + masquerade: {} + bootOrder: 2 + machine: + type: pc-q35-rhel9.4.0 + networks: + - name: default + pod: {} + volumes: + - dataVolume: + name: "{{ .name }}-{{ .counter }}-{{ .Replica }}-root" + name: rootdisk + {{ range $dataVolumeIndex := .dataVolumeCounters }} + - dataVolume: + name: "{{ $name }}-{{ $counter }}-{{ $replica }}-data-{{ $dataVolumeIndex }}" + name: "data-{{ . }}" + {{ end }} + - cloudInitNoCloud: + userData: | + #cloud-config + chpasswd: + expire: false + password: {{ uuidv4 }} + user: fedora + runcmd: [] + name: cloudinitdisk diff --git a/cmd/config/virt-capacity-benchmark/virt-capacity-benchmark.yml b/cmd/config/virt-capacity-benchmark/virt-capacity-benchmark.yml new file mode 100644 index 00000000..1e619bcd --- /dev/null +++ b/cmd/config/virt-capacity-benchmark/virt-capacity-benchmark.yml @@ -0,0 +1,214 @@ +{{- $kubeBurnerFQDN := "kube-burner.io" -}} +{{- $testName := "virt-capacity-benchmark" }} +{{- $nsName := .testNamespace -}} +{{- $vmCount := .vmCount -}} +{{- $vmName := $testName -}} +{{- $sshPublicKeySecretName := $testName -}} +{{- $rootVolumeSizeStr := (list (.rootVolumeSize | toString) "Gi") | join "" -}} +{{- $dataVolumeSizeStr := (list (.dataVolumeSize | toString) "Gi") | join "" -}} +{{- $resizedRootVolumeSize := add .rootVolumeSize .volumeSizeIncrement -}} +{{- $resizedDataVolumeSize := add .dataVolumeSize .volumeSizeIncrement -}} +{{- $volumeLabelKey := (list $testName "." $kubeBurnerFQDN "/volume-type") | join "" -}} +{{- $volumeLabelValueRoot := "root" -}} +{{- $volumeLabelValueData := "data" -}} +{{- $jobCounterLabelKey := (list $testName "." $kubeBurnerFQDN "/counter") | join "" -}} +{{- $jobCounterLabelValue := (list "counter-" (.counter | toString )) | join "" -}} +{{- $testNamespacesLabelKey := (list $kubeBurnerFQDN "/test-name") | join "" -}} +{{- $testNamespacesLabelValue := $testName -}} +{{- $metricsBaseDirectory := $testName -}} +--- +global: + measurements: + - name: vmiLatency + +metricsEndpoints: +- indexer: + type: local + metricsDirectory: "./{{ $metricsBaseDirectory }}/iteration-{{ .counter | toString }}" + +jobs: +# Run cleanup only when counter is 0 +{{ if eq (.counter | int) 0 }} +- name: start-fresh + jobType: delete + waitForDeletion: true + qps: 5 + burst: 10 + objects: + - kind: Namespace + labelSelector: + {{ $testNamespacesLabelKey }}: {{ $testNamespacesLabelValue }} +{{ end }} + +- name: create-vms + jobType: create + jobIterations: 1 + qps: 20 + burst: 20 + namespacedIterations: false + namespace: {{ $nsName }} + namespaceLabels: + {{ $testNamespacesLabelKey }}: {{ $testNamespacesLabelValue }} + # verify object count after running each job + verifyObjects: true + errorOnVerify: true + # interval between jobs execution + jobIterationDelay: 20s + # wait all VMI be in the Ready Condition + waitWhenFinished: false + podWait: true + # timeout time after waiting for all object creation + maxWaitTimeout: 2h + jobPause: 10s + cleanup: false + # Set missing key as empty to allow using default values + defaultMissingKeysWithZero: true + beforeCleanup: "./check.sh check_vm_running {{ $jobCounterLabelKey }} {{ $jobCounterLabelValue }} {{ $nsName }} {{ .privateKey }} fedora" + objects: + + - objectTemplate: templates/secret_ssh_public.yml + runOnce: true + replicas: 1 + inputVars: + name: {{ $sshPublicKeySecretName }} + counter: {{ .counter | toString }} + publicKeyPath: {{ .publicKey }} + + - objectTemplate: templates/vm.yml + replicas: {{ $vmCount }} + waitOptions: + labelSelector: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + inputVars: + name: {{ $vmName }} + counter: {{ .counter | toString }} + rootDiskImage: quay.io/containerdisks/fedora:latest + storageClassName: {{ .storageClassName }} + vmLabels: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + rootVolumeLabels: + {{ $volumeLabelKey }}: {{ $volumeLabelValueRoot }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + rootVolumeSize: {{ $rootVolumeSizeStr }} + dataVolumeSize: {{ $dataVolumeSizeStr }} + dataVolumeLabels: + {{ $volumeLabelKey }}: {{ $volumeLabelValueData }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + sshPublicKeySecret: {{ $sshPublicKeySecretName }} + dataVolumeCounters: + {{ range .dataVolumeCounters }} + - {{ . }} + {{ end }} + accessMode: {{ .skipMigrationJob | ternary "ReadWriteOnce" "ReadWriteMany"}} + +- name: resize-volumes + jobType: patch + jobIterations: 1 + jobIterationDelay: 15s + executionMode: sequential + qps: 20 + burst: 20 + waitWhenFinished: true + {{ if not .skipResizePropagationCheck }} + beforeCleanup: "./check.sh check_resize {{ $jobCounterLabelKey }} {{ $jobCounterLabelValue }} {{ $nsName }} {{ .privateKey }} fedora {{ $resizedRootVolumeSize | toString }}G {{ $resizedDataVolumeSize | toString }}G" + {{ end }} + objects: + - apiVersion: v1 + kind: PersistentVolumeClaim + labelSelector: + {{ $volumeLabelKey }}: {{ $volumeLabelValueData }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + patchType: "application/strategic-merge-patch+json" + objectTemplate: templates/resize_pvc.yml + inputVars: + storageSize: "{{ $resizedDataVolumeSize | toString }}Gi" + waitOptions: + customStatusPaths: + - key: '.capacity.storage' + value: "{{ $resizedDataVolumeSize | toString }}Gi" + labelSelector: + {{ $volumeLabelKey }}: {{ $volumeLabelValueData }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + - apiVersion: v1 + kind: PersistentVolumeClaim + labelSelector: + {{ $volumeLabelKey }}: {{ $volumeLabelValueRoot }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + patchType: "application/strategic-merge-patch+json" + objectTemplate: templates/resize_pvc.yml + inputVars: + storageSize: "{{ $resizedRootVolumeSize | toString }}Gi" + waitOptions: + customStatusPaths: + - key: '.capacity.storage' + value: "{{ $resizedRootVolumeSize | toString }}Gi" + labelSelector: + {{ $volumeLabelKey }}: {{ $volumeLabelValueRoot }} + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + +- name: restart-vms + jobType: kubevirt + qps: 20 + burst: 20 + jobIterations: 1 + maxWaitTimeout: 1h + objectDelay: 1m + objectWait: true + beforeCleanup: "./check.sh check_vm_running {{ $jobCounterLabelKey }} {{ $jobCounterLabelValue }} {{ $nsName }} {{ .privateKey }} fedora" + objects: + - kubeVirtOp: restart + labelSelector: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + +- name: snapshot-vms + jobType: create + qps: 20 + burst: 20 + jobIterations: 1 + maxWaitTimeout: 1h + namespacedIterations: false + namespace: {{ $nsName }} + # verify object count after running each job + verifyObjects: true + errorOnVerify: true + # interval between jobs execution + jobIterationDelay: 20s + # wait all VMI be in the Ready Condition + waitWhenFinished: false + podWait: true + # timeout time after waiting for all object creation + jobPause: 10s + cleanup: false + # Set missing key as empty to allow using default values + defaultMissingKeysWithZero: true + preLoadImages: false + objects: + - objectTemplate: templates/vm-snapshot.yml + replicas: {{ $vmCount }} + inputVars: + name: {{ $vmName }} + counter: {{ .counter | toString }} + snapshotLabels: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + waitOptions: + customStatusPaths: + - key: '(.conditions.[] | select(.type == "Ready")).status' + value: "True" + labelSelector: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} + +{{ if not .skipMigrationJob }} +- name: migrate-vms + jobType: kubevirt + qps: 20 + burst: 20 + jobIterations: 1 + maxWaitTimeout: 1h + objectDelay: 1m + waitWhenFinished: true + beforeCleanup: "./check.sh check_vm_running {{ $jobCounterLabelKey }} {{ $jobCounterLabelValue }} {{ $nsName }} {{ .privateKey }} fedora" + objects: + - kubeVirtOp: migrate + labelSelector: + {{ $jobCounterLabelKey }}: {{ $jobCounterLabelValue }} +{{ end }} \ No newline at end of file diff --git a/cmd/ocp.go b/cmd/ocp.go index 340b2dc1..18be8c73 100644 --- a/cmd/ocp.go +++ b/cmd/ocp.go @@ -128,6 +128,7 @@ func openShiftCmd() *cobra.Command { ocp.NewVirtDensity(&wh), ocp.ClusterHealth(), ocp.CustomWorkload(&wh), + ocp.NewVirtCapacityBenchmark(&wh), ) util.SetupCmd(ocpCmd) return ocpCmd diff --git a/common.go b/common.go index 36494e9d..a087b6d3 100644 --- a/common.go +++ b/common.go @@ -20,9 +20,11 @@ import ( "os" "strings" + k8sconnector "github.com/cloud-bulldozer/go-commons/v2/k8s-connector" ocpmetadata "github.com/cloud-bulldozer/go-commons/v2/ocp-metadata" "github.com/kube-burner/kube-burner/pkg/config" "github.com/kube-burner/kube-burner/pkg/workloads" + log "github.com/sirupsen/logrus" "github.com/spf13/cobra" ) @@ -70,3 +72,28 @@ func GatherMetadata(wh *workloads.WorkloadHelper, alerting bool) error { } return nil } + +func getK8SConnector() k8sconnector.K8SConnector { + kubeClientProvider := config.NewKubeClientProvider("", "") + _, restConfig := kubeClientProvider.DefaultClientSet() + k8sConnector, err := k8sconnector.NewK8SConnector(restConfig) + if err != nil { + log.Fatal(err) + } + return k8sConnector +} + +func generateLoopCounterSlice(length int) []string { + counter := make([]string, length) + for i := 0; i < length; i++ { + counter[i] = fmt.Sprint(i + 1) + } + return counter +} + +func roundUpToMultiple(num, multiple int) int { + if multiple == 0 { + return num // Avoid division by zero + } + return ((num + multiple - 1) / multiple) * multiple +} diff --git a/go.sum b/go.sum index 8bde2fef..ef38f2e0 100644 --- a/go.sum +++ b/go.sum @@ -348,6 +348,7 @@ github.com/openshift/custom-resource-status v1.1.2 h1:C3DL44LEbvlbItfd8mT5jWrqPf github.com/openshift/custom-resource-status v1.1.2/go.mod h1:DB/Mf2oTeiAmVVX1gN+NEqweonAPY0TKUwADizj8+ZA= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= @@ -752,6 +753,8 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= diff --git a/test/helpers.bash b/test/helpers.bash index b29dc09e..ae84891a 100644 --- a/test/helpers.bash +++ b/test/helpers.bash @@ -110,3 +110,29 @@ run_cmd(){ echo "$@" ${@} } + +check_metric_recorded() { + local folder=$1 + local job=$2 + local type=$3 + local metric=$4 + local m + m=$(cat ${folder}/${type}Measurement-${job}.json | jq .[0].${metric}) + if [[ ${m} -eq 0 ]]; then + echo "metric ${type}/${metric} was not recorded for ${job}" + return 1 + fi +} + +check_quantile_recorded() { + local folder=$1 + local job=$2 + local type=$3 + local quantileName=$4 + + MEASUREMENT=$(cat ${folder}/${type}QuantilesMeasurement-${job}.json | jq --arg name "${quantileName}" '[.[] | select(.quantileName == $name)][0].avg') + if [[ ${MEASUREMENT} -eq 0 ]]; then + echo "Quantile for ${type}/${quantileName} was not recorded for ${job}" + return 1 + fi +} diff --git a/test/test-ocp.bats b/test/test-ocp.bats index bb9f22f0..e4efb31d 100755 --- a/test/test-ocp.bats +++ b/test/test-ocp.bats @@ -6,7 +6,7 @@ load helpers.bash setup_file() { cd ocp - export BATS_TEST_TIMEOUT=600 + export BATS_TEST_TIMEOUT=1800 export ES_SERVER="$PERFSCALE_PROD_ES_SERVER" export ES_INDEX="kube-burner-ocp" trap print_events ERR @@ -122,3 +122,14 @@ teardown_file() { @test "cluster-health" { run_cmd kube-burner-ocp cluster-health } + +@test "virt-capacity-benchmark" { + VIRT_CAPACITY_BENCHMARK_STORAGE_CLASS=${VIRT_CAPACITY_BENCHMARK_STORAGE_CLASS:-oci-bv} + run_cmd kube-burner-ocp virt-capacity-benchmark --storage-class $VIRT_CAPACITY_BENCHMARK_STORAGE_CLASS --max-iterations 2 --data-volume-count 2 --vms 2 --skip-migration-job --volume-round-size 50 --skip-resize-propagation-check + local jobs=("create-vms" "restart-vms") + for job in "${jobs[@]}"; do + check_metric_recorded ./virt-capacity-benchmark/iteration-1 ${job} vmiLatency vmReadyLatency + check_quantile_recorded ./virt-capacity-benchmark/iteration-1 ${job} vmiLatency VMReady + done + oc delete namespace virt-capacity-benchmark +} diff --git a/virt-capacity-benchmark.go b/virt-capacity-benchmark.go new file mode 100644 index 00000000..15b2e7f9 --- /dev/null +++ b/virt-capacity-benchmark.go @@ -0,0 +1,151 @@ +// Copyright 2025 The Kube-burner Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ocp + +import ( + "fmt" + "os" + + k8sstorage "github.com/cloud-bulldozer/go-commons/v2/k8s-storage" + "github.com/cloud-bulldozer/go-commons/v2/ssh" + "github.com/cloud-bulldozer/go-commons/v2/virtctl" + "github.com/kube-burner/kube-burner/pkg/workloads" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +const ( + VirtCapacityBenchmarkSSHKeyFileName = "ssh" + VirtCapacityBenchmarkTmpDirPattern = "kube-burner-capacity-benchmark-*" + virtCapacityBenchmarkTestName = "virt-capacity-benchmark" +) + +// NewVirtCapacityBenchmark holds the virt-capacity-benchmark workload +func NewVirtCapacityBenchmark(wh *workloads.WorkloadHelper) *cobra.Command { + var storageClassName string + var sshKeyPairPath string + var maxIterations int + var vmsPerIteration int + var dataVolumeCount int + var testNamespace string + var skipMigrationJob bool + var volumeRoundSize int + var skipResizePropagationCheck bool + var metricsProfiles []string + var rc int + cmd := &cobra.Command{ + Use: virtCapacityBenchmarkTestName, + Short: "Runs capacity-benchmark workload", + SilenceUsage: true, + PreRun: func(cmd *cobra.Command, args []string) { + var err error + + if !virtctl.IsInstalled() { + log.Fatalf("Failed to run virtctl. Check that it is installed, in PATH and working") + } + + k8sConnector := getK8SConnector() + + storageClassName, err = k8sstorage.GetStorageClassName(k8sConnector, storageClassName, true) + if err != nil { + log.Fatal(err) + } + supported, err := k8sstorage.StorageClassSupportsVolumeExpansion(k8sConnector, storageClassName) + if err != nil { + log.Fatal(err) + } + if !supported { + log.Fatalf("Storage Class [%s] does not support volume expansion", storageClassName) + } + volumeSnapshotClassName, err := k8sstorage.GetVolumeSnapshotClassNameForStorageClass(k8sConnector, storageClassName) + if err != nil { + log.Fatal(err) + } + if volumeSnapshotClassName == "" { + log.Fatalf("Could not find a corresponding VolumeSnapshotClass for StorageClass %s", storageClassName) + } + log.Infof("Running tests with Storage Class [%s]", storageClassName) + }, + Run: func(cmd *cobra.Command, args []string) { + privateKeyPath, publicKeyPath, err := ssh.GenerateSSHKeyPair(sshKeyPairPath, VirtCapacityBenchmarkTmpDirPattern, VirtCapacityBenchmarkSSHKeyFileName) + if err != nil { + log.Fatalf("Failed to generate SSH keys for the test - %v", err) + } + + rootVolumeSize := 6 + dataVolumeSize := 1 + volumeSizeIncrement := 1 + if volumeRoundSize != 0 { + log.Infof("Rounding volume sizes to be a multiple of %v", volumeRoundSize) + rootVolumeSize = roundUpToMultiple(rootVolumeSize, volumeRoundSize) + dataVolumeSize = roundUpToMultiple(dataVolumeSize, volumeRoundSize) + volumeSizeIncrement = roundUpToMultiple(volumeSizeIncrement, volumeRoundSize) + } + + if skipMigrationJob { + log.Infof("skipMigrationJob is set to true") + } + if skipResizePropagationCheck { + log.Infof("skipResizePropagationCheck is set to true") + } + + additionalVars := map[string]interface{}{ + "privateKey": privateKeyPath, + "publicKey": publicKeyPath, + "vmCount": fmt.Sprint(vmsPerIteration), + "storageClassName": storageClassName, + "testNamespace": testNamespace, + "dataVolumeCounters": generateLoopCounterSlice(dataVolumeCount), + "skipMigrationJob": skipMigrationJob, + "rootVolumeSize": rootVolumeSize, + "dataVolumeSize": dataVolumeSize, + "volumeSizeIncrement": volumeSizeIncrement, + "skipResizePropagationCheck": skipResizePropagationCheck, + } + + setMetrics(cmd, metricsProfiles) + + log.Infof("Running tests in Namespace [%s]", testNamespace) + counter := 0 + for { + os.Setenv("counter", fmt.Sprint(counter)) + rc = wh.RunWithAdditionalVars(cmd.Name(), additionalVars) + if rc != 0 { + log.Infof("Capacity failed in loop #%d", counter) + break + } + counter += 1 + if maxIterations > 0 && counter >= maxIterations { + log.Infof("Reached maxIterations [%d]", maxIterations) + break + } + } + }, + PostRun: func(cmd *cobra.Command, args []string) { + os.Exit(rc) + }, + } + cmd.Flags().StringVar(&storageClassName, "storage-class", "", "Name of the Storage Class to test") + cmd.Flags().StringVar(&sshKeyPairPath, "ssh-key-path", "", "Path to save the generarated SSH keys - default to a temporary location") + cmd.Flags().IntVar(&maxIterations, "max-iterations", 0, "Maximum times to run the test sequence. Default - run until failure (0)") + cmd.Flags().IntVar(&vmsPerIteration, "vms", 5, "Number of VMs to test in each iteration") + cmd.Flags().IntVar(&dataVolumeCount, "data-volume-count", 9, "Number of data volumes per VM") + cmd.Flags().StringVarP(&testNamespace, "namespace", "n", virtCapacityBenchmarkTestName, "Namespace to run the test in") + cmd.Flags().BoolVar(&skipMigrationJob, "skip-migration-job", false, "Skip the migration job - use when the StorageClass does not support RWX") + cmd.Flags().IntVar(&volumeRoundSize, "volume-round-size", 0, "Size to round up volume sizes to - use when enforced or overridden by the StorageClass") + cmd.Flags().BoolVar(&skipResizePropagationCheck, "skip-resize-propagation-check", false, "Skip the resize propagation check - For now use when values are propagated in a base of 10 instead of 2") + cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics-aggregated.yml"}, "Comma separated list of metrics profiles to use") + return cmd +}