From fd4dd9d3ccffbb010f2bc452b35e455a46c91702 Mon Sep 17 00:00:00 2001 From: Drew Wells Date: Mon, 23 Dec 2024 21:04:01 -0600 Subject: [PATCH] DEVOPS-30046 optional sidecar probes Make the probes optional as a fallback that the liveness probe failure issues are not resolved. --- cmd/main.go | 15 ++++-- helm/db-controller/templates/deployment.yaml | 2 + helm/db-controller/values.yaml | 6 +++ internal/webhook/dbproxy.go | 48 ++++++++++------- internal/webhook/dbproxy_test.go | 5 +- internal/webhook/defaulter.go | 38 +++++++------ internal/webhook/dsnexec.go | 57 ++++++++++++++------ internal/webhook/dsnexec_test.go | 4 +- internal/webhook/suite_test.go | 10 ++-- 9 files changed, 124 insertions(+), 61 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 34a3944d..94960e09 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -95,6 +95,8 @@ func main() { var metricsConfigYamlPath string var enableDBProxyWebhook bool var enableDeprecatedConversionWebhook bool + var enableSidecarLivenessProbe bool + var enableSidecarReadinessProbe bool flag.StringVar(&class, "class", "default", "The class of claims this db-controller instance needs to address.") @@ -105,6 +107,9 @@ func main() { flag.BoolVar(&enableDBProxyWebhook, "enable-db-proxy", false, "Enable DB Proxy webhook. See docs for usage: https://infobloxopen.github.io/db-controller/#quick-start") flag.BoolVar(&enableDeprecatedConversionWebhook, "enable-deprecation-conversion-webhook", false, "Enable conversion of deprecated pods using dbproxy and/or dsnexec annotations") + flag.BoolVar(&enableSidecarLivenessProbe, "enable-sidecar-liveness-probe", false, "Enable liveness probe for dbproxy and dsnexec sidecars") + flag.BoolVar(&enableSidecarReadinessProbe, "enable-sidecar-readiness-probe", false, "Enable readiness probe for dbproxy and dsnexec sidecars") + opts := zap.Options{ Development: true, } @@ -257,10 +262,12 @@ func main() { if enableDBProxyWebhook { if err := mutating.SetupWebhookWithManager(mgr, mutating.SetupConfig{ - Namespace: namespace, - Class: class, - DBProxyImg: os.Getenv("DBPROXY_IMAGE"), - DSNExecImg: os.Getenv("DSNEXEC_IMAGE"), + Namespace: namespace, + Class: class, + DBProxyImg: os.Getenv("DBPROXY_IMAGE"), + DSNExecImg: os.Getenv("DSNEXEC_IMAGE"), + EnableReady: enableSidecarReadinessProbe, + EnableLiveness: enableSidecarLivenessProbe, }); err != nil { setupLog.Error(err, "failed to setup webhooks") os.Exit(1) diff --git a/helm/db-controller/templates/deployment.yaml b/helm/db-controller/templates/deployment.yaml index 32ed82ff..887d8b4a 100644 --- a/helm/db-controller/templates/deployment.yaml +++ b/helm/db-controller/templates/deployment.yaml @@ -56,6 +56,8 @@ spec: - -zap-encoder={{ .Values.zapLogger.encoding }} - -zap-log-level={{ .Values.zapLogger.level }} - -zap-time-encoding={{ .Values.zapLogger.timeEncoding }} + - --enable-sidecar-liveness-probe={{ .Values.probes.liveness.enabled }} + - --enable-sidecar-readiness-probe={{ .Values.probes.readiness.enabled }} command: - /manager image: "{{ .Values.image.repository }}:{{ default .Chart.AppVersion .Values.image.tag }}" diff --git a/helm/db-controller/values.yaml b/helm/db-controller/values.yaml index f16f413d..a8a9ef71 100644 --- a/helm/db-controller/values.yaml +++ b/helm/db-controller/values.yaml @@ -187,5 +187,11 @@ controllerConfig: ib_env: "{{ .Values.env }}" ib_lifecycle: "{{ .Values.lifecycle }}" +probes: + liveness: + enabled: false + readiness: + enabled: true + dashboard: enabled: false diff --git a/internal/webhook/dbproxy.go b/internal/webhook/dbproxy.go index 3d9edbf2..2aca4243 100644 --- a/internal/webhook/dbproxy.go +++ b/internal/webhook/dbproxy.go @@ -13,7 +13,7 @@ var ( ContainerNameProxy = "dbproxy" ) -func mutatePodProxy(ctx context.Context, pod *corev1.Pod, secretName string, dbProxyImg string) error { +func mutatePodProxy(ctx context.Context, pod *corev1.Pod, secretName string, dbProxyImg string, enableReady, enableLiveness bool) error { if pod.Annotations == nil { pod.Annotations = map[string]string{} @@ -52,20 +52,9 @@ func mutatePodProxy(ctx context.Context, pod *corev1.Pod, secretName string, dbP return nil } - pod.Spec.Containers = append(pod.Spec.Containers, corev1.Container{ - Name: ContainerNameProxy, - Image: dbProxyImg, - ImagePullPolicy: corev1.PullIfNotPresent, - - Env: []corev1.EnvVar{ - { - Name: "DBPROXY_CREDENTIAL", - Value: fmt.Sprintf("/dbproxy/%s", SecretKey), - }, - }, - // Test pgbouncer - ReadinessProbe: &corev1.Probe{ - + var readinessProbe *corev1.Probe + if enableReady { + readinessProbe = &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ Exec: &corev1.ExecAction{ Command: []string{ @@ -78,11 +67,13 @@ func mutatePodProxy(ctx context.Context, pod *corev1.Pod, secretName string, dbP InitialDelaySeconds: 5, PeriodSeconds: 15, TimeoutSeconds: 5, - }, - // FIXME: turn these back on when timeouts can be tuned. It was restarting - // the pod too often. - // Test connection to upstream database - LivenessProbe: &corev1.Probe{ + } + } + + var livenessProbe *corev1.Probe + + if enableLiveness { + livenessProbe = &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ Exec: &corev1.ExecAction{ Command: []string{ @@ -95,7 +86,24 @@ func mutatePodProxy(ctx context.Context, pod *corev1.Pod, secretName string, dbP InitialDelaySeconds: 30, PeriodSeconds: 15, TimeoutSeconds: 5, + } + } + + pod.Spec.Containers = append(pod.Spec.Containers, corev1.Container{ + Name: ContainerNameProxy, + Image: dbProxyImg, + ImagePullPolicy: corev1.PullIfNotPresent, + + Env: []corev1.EnvVar{ + { + Name: "DBPROXY_CREDENTIAL", + Value: fmt.Sprintf("/dbproxy/%s", SecretKey), + }, }, + // Test connection to upstream database + LivenessProbe: livenessProbe, + // Test pgbouncer + ReadinessProbe: readinessProbe, VolumeMounts: []corev1.VolumeMount{ { Name: VolumeNameProxy, diff --git a/internal/webhook/dbproxy_test.go b/internal/webhook/dbproxy_test.go index c29defd3..e5cd5ea4 100644 --- a/internal/webhook/dbproxy_test.go +++ b/internal/webhook/dbproxy_test.go @@ -128,6 +128,9 @@ var _ = Describe("dbproxy defaulting", func() { Expect(sidecar.VolumeMounts[0].Name).To(Equal(VolumeNameProxy)) Expect(sidecar.VolumeMounts[0].MountPath).To(Equal(MountPathProxy)) Expect(sidecar.VolumeMounts[0].ReadOnly).To(BeTrue()) + Expect(sidecar.ReadinessProbe).ToNot(BeNil()) + Expect(sidecar.LivenessProbe).ToNot(BeNil()) + }) It("pre-mutated pods are not re-mutated", func() { @@ -147,7 +150,7 @@ var _ = Describe("dbproxy defaulting", func() { func makeMutatedPodProxy(name, claimName, secretName string) *corev1.Pod { pod := makePodProxy(name, claimName) - Expect(mutatePodProxy(context.TODO(), pod, secretName, sidecarImageProxy)).To(Succeed()) + Expect(mutatePodProxy(context.TODO(), pod, secretName, sidecarImageProxy, true, true)).To(Succeed()) return pod } diff --git a/internal/webhook/defaulter.go b/internal/webhook/defaulter.go index 9ae70c81..3f0ef5fa 100644 --- a/internal/webhook/defaulter.go +++ b/internal/webhook/defaulter.go @@ -33,18 +33,22 @@ var ( // podAnnotator annotates Pods type podAnnotator struct { - class string - namespace string - dbProxyImg string - dsnExecImg string - k8sClient client.Reader + class string + namespace string + dbProxyImg string + dsnExecImg string + k8sClient client.Reader + enableReady bool + enableLiveness bool } type SetupConfig struct { - Namespace string - Class string - DBProxyImg string - DSNExecImg string + Namespace string + Class string + DBProxyImg string + DSNExecImg string + EnableReady bool + EnableLiveness bool } func SetupWebhookWithManager(mgr ctrl.Manager, cfg SetupConfig) error { @@ -68,11 +72,13 @@ func SetupWebhookWithManager(mgr ctrl.Manager, cfg SetupConfig) error { return ctrl.NewWebhookManagedBy(mgr). For(&corev1.Pod{}). WithDefaulter(&podAnnotator{ - namespace: cfg.Namespace, - class: cfg.Class, - dbProxyImg: cfg.DBProxyImg, - dsnExecImg: cfg.DSNExecImg, - k8sClient: mgr.GetClient(), + namespace: cfg.Namespace, + class: cfg.Class, + dbProxyImg: cfg.DBProxyImg, + dsnExecImg: cfg.DSNExecImg, + k8sClient: mgr.GetClient(), + enableReady: cfg.EnableReady, + enableLiveness: cfg.EnableLiveness, }). Complete() } @@ -133,13 +139,13 @@ func (p *podAnnotator) Default(ctx context.Context, obj runtime.Object) error { if pod.Labels[LabelCheckProxy] == "enabled" && pod.Annotations[AnnotationInjectedProxy] != "true" { - err = mutatePodProxy(ctx, pod, secretName, p.dbProxyImg) + err = mutatePodProxy(ctx, pod, secretName, p.dbProxyImg, p.enableReady, p.enableLiveness) if err == nil { log.Info("mutated_pod_dbproxy") } } else if pod.Labels[LabelCheckExec] == "enabled" && pod.Annotations[AnnotationInjectedExec] != "true" { - err = mutatePodExec(ctx, pod, secretName, p.dsnExecImg) + err = mutatePodExec(ctx, pod, secretName, p.dsnExecImg, p.enableReady, p.enableLiveness) if err == nil { log.Info("mutated_pod_dsnexec") } diff --git a/internal/webhook/dsnexec.go b/internal/webhook/dsnexec.go index 62bbce45..3418496f 100644 --- a/internal/webhook/dsnexec.go +++ b/internal/webhook/dsnexec.go @@ -16,7 +16,7 @@ var ( VolumeNameExecConfig = "dsnexec-config" ) -func mutatePodExec(ctx context.Context, pod *corev1.Pod, secretName string, dsnExecImg string) error { +func mutatePodExec(ctx context.Context, pod *corev1.Pod, secretName string, dsnExecImg string, enableReady, enableLiveness bool) error { if pod.Annotations == nil { pod.Annotations = map[string]string{} @@ -75,6 +75,44 @@ func mutatePodExec(ctx context.Context, pod *corev1.Pod, secretName string, dsnE return nil } + var readinessProbe *corev1.Probe + if enableReady { + readinessProbe = &corev1.Probe{ + + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "/bin/sh", + "-c", + "psql -h localhost -c \"SELECT 1\"", + }, + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 15, + TimeoutSeconds: 5, + } + } + + var livenessProbe *corev1.Probe + + if enableLiveness { + livenessProbe = &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{ + "/bin/sh", + "-c", + fmt.Sprintf("psql \"$(cat /dbproxy/%s)\" -c \"SELECT 1\"", SecretKey), + }, + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 15, + TimeoutSeconds: 5, + } + } + pod.Spec.Containers = append(pod.Spec.Containers, corev1.Container{ Name: ContainerNameExec, Image: dsnExecImg, @@ -91,20 +129,9 @@ func mutatePodExec(ctx context.Context, pod *corev1.Pod, secretName string, dsnE }, }, // Test connection to upstream database - LivenessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{ - "/bin/sh", - "-c", - fmt.Sprintf("psql \"$(cat %s/%s)\" -c \"SELECT 1\"", MountPathExec, SecretKey), - }, - }, - }, - InitialDelaySeconds: 30, - PeriodSeconds: 15, - TimeoutSeconds: 5, - }, + LivenessProbe: livenessProbe, + // Test connection to pgbouncer + ReadinessProbe: readinessProbe, VolumeMounts: []corev1.VolumeMount{ { Name: VolumeNameExec, diff --git a/internal/webhook/dsnexec_test.go b/internal/webhook/dsnexec_test.go index c98c2eb0..c05a5efd 100644 --- a/internal/webhook/dsnexec_test.go +++ b/internal/webhook/dsnexec_test.go @@ -126,6 +126,8 @@ var _ = Describe("dsnexec defaulting", func() { Expect(sidecar.VolumeMounts[0].Name).To(Equal(VolumeNameExec)) Expect(sidecar.VolumeMounts[0].MountPath).To(Equal(MountPathExec)) Expect(sidecar.VolumeMounts[0].ReadOnly).To(BeTrue()) + Expect(sidecar.ReadinessProbe).ToNot(BeNil()) + Expect(sidecar.LivenessProbe).ToNot(BeNil()) }) It("pre-mutated pods are not re-mutated", func() { @@ -145,7 +147,7 @@ var _ = Describe("dsnexec defaulting", func() { func makeMutatedPodExec(name, claimName, secretName string) *corev1.Pod { pod := makePodExec(name, claimName) - Expect(mutatePodExec(context.TODO(), pod, secretName, sidecarImageExec)).To(Succeed()) + Expect(mutatePodExec(context.TODO(), pod, secretName, sidecarImageExec, true, true)).To(Succeed()) return pod } diff --git a/internal/webhook/suite_test.go b/internal/webhook/suite_test.go index 5a66974a..4921daba 100644 --- a/internal/webhook/suite_test.go +++ b/internal/webhook/suite_test.go @@ -106,10 +106,12 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = SetupWebhookWithManager(mgr, SetupConfig{ - DBProxyImg: sidecarImageProxy, - DSNExecImg: sidecarImageExec, - Namespace: "default", - Class: "default", + DBProxyImg: sidecarImageProxy, + DSNExecImg: sidecarImageExec, + Namespace: "default", + Class: "default", + EnableReady: true, + EnableLiveness: true, }) Expect(err).NotTo(HaveOccurred())