diff --git a/.golangci.yaml b/.golangci.yaml index 92c702877d..8a6ea92e48 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -8,8 +8,8 @@ linters: enable: - asciicheck - bidichk + - copyloopvar - errorlint - - exportloopref - gosec - revive - stylecheck diff --git a/go.mod b/go.mod index 9e7bac8f4d..d8c865589b 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module sigs.k8s.io/karpenter -go 1.22.5 +go 1.22.12 require ( github.com/Pallinder/go-randomdata v1.2.0 diff --git a/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml b/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml index c93d3e75ca..c00c911b79 100644 --- a/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml +++ b/kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: kwoknodeclasses.karpenter.kwok.sh spec: group: karpenter.kwok.sh diff --git a/kwok/charts/crds/karpenter.sh_nodeclaims.yaml b/kwok/charts/crds/karpenter.sh_nodeclaims.yaml index f8ecbec4b7..76bc49d3e0 100644 --- a/kwok/charts/crds/karpenter.sh_nodeclaims.yaml +++ b/kwok/charts/crds/karpenter.sh_nodeclaims.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: nodeclaims.karpenter.sh spec: group: karpenter.sh diff --git a/kwok/charts/crds/karpenter.sh_nodepools.yaml b/kwok/charts/crds/karpenter.sh_nodepools.yaml index 05bafa01f9..970652bc85 100644 --- a/kwok/charts/crds/karpenter.sh_nodepools.yaml +++ b/kwok/charts/crds/karpenter.sh_nodepools.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: nodepools.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index e949ef3632..8edc06c2d9 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: nodeclaims.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 6bfc0532a3..42a60eae06 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: nodepools.karpenter.sh spec: group: karpenter.sh diff --git a/pkg/controllers/nodeclaim/lifecycle/controller.go b/pkg/controllers/nodeclaim/lifecycle/controller.go index 8f3335c8f3..b4870c50a5 100644 --- a/pkg/controllers/nodeclaim/lifecycle/controller.go +++ b/pkg/controllers/nodeclaim/lifecycle/controller.go @@ -70,7 +70,7 @@ func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider clou kubeClient: kubeClient, launch: &Launch{kubeClient: kubeClient, cloudProvider: cloudProvider, cache: cache.New(time.Minute, time.Second*10), recorder: recorder}, - registration: &Registration{kubeClient: kubeClient}, + registration: &Registration{kubeClient: kubeClient, recorder: recorder}, initialization: &Initialization{kubeClient: kubeClient}, liveness: &Liveness{clock: clk, kubeClient: kubeClient}, } diff --git a/pkg/controllers/nodeclaim/lifecycle/events.go b/pkg/controllers/nodeclaim/lifecycle/events.go index 2697b1cdc0..95734a7aba 100644 --- a/pkg/controllers/nodeclaim/lifecycle/events.go +++ b/pkg/controllers/nodeclaim/lifecycle/events.go @@ -44,3 +44,13 @@ func NodeClassNotReadyEvent(nodeClaim *v1.NodeClaim, err error) events.Event { DedupeValues: []string{string(nodeClaim.UID)}, } } + +func UnregisteredTaintMissingEvent(nodeClaim *v1.NodeClaim) events.Event { + return events.Event{ + InvolvedObject: nodeClaim, + Type: corev1.EventTypeWarning, + Reason: "UnregisteredTaintMissing", + Message: fmt.Sprintf("Missing %s taint which prevents registration related race conditions on Karpenter-managed nodes", v1.UnregisteredTaintKey), + DedupeValues: []string{string(nodeClaim.UID)}, + } +} diff --git a/pkg/controllers/nodeclaim/lifecycle/registration.go b/pkg/controllers/nodeclaim/lifecycle/registration.go index 77e0e22160..3cad425e1d 100644 --- a/pkg/controllers/nodeclaim/lifecycle/registration.go +++ b/pkg/controllers/nodeclaim/lifecycle/registration.go @@ -32,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/events" "sigs.k8s.io/karpenter/pkg/metrics" "sigs.k8s.io/karpenter/pkg/scheduling" nodeclaimutil "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" @@ -39,6 +40,7 @@ import ( type Registration struct { kubeClient client.Client + recorder events.Recorder } func (r *Registration) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (reconcile.Result, error) { @@ -61,11 +63,11 @@ func (r *Registration) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) ( _, hasStartupTaint := lo.Find(node.Spec.Taints, func(t corev1.Taint) bool { return t.MatchTaint(&v1.UnregisteredNoExecuteTaint) }) - // check if sync succeeded but setting the registered status condition failed - // if sync succeeded, then the label will be present and the taint will be gone + // if the sync hasn't happened yet and the race protecting startup taint isn't present then log it as missing and proceed + // if the sync has happened then the startup taint has been removed if it was present if _, ok := node.Labels[v1.NodeRegisteredLabelKey]; !ok && !hasStartupTaint { - nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeRegistered, "UnregisteredTaintNotFound", fmt.Sprintf("Invariant violated, %s taint must be present on Karpenter-managed nodes", v1.UnregisteredTaintKey)) - return reconcile.Result{}, fmt.Errorf("missing required startup taint, %s", v1.UnregisteredTaintKey) + log.FromContext(ctx).Error(fmt.Errorf("missing %s taint which prevents registration related race conditions on Karpenter-managed nodes", v1.UnregisteredTaintKey), "node claim registration error") + r.recorder.Publish(UnregisteredTaintMissingEvent(nodeClaim)) } ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", node.Name))) if err = r.syncNode(ctx, nodeClaim, node); err != nil { diff --git a/pkg/controllers/nodeclaim/lifecycle/registration_test.go b/pkg/controllers/nodeclaim/lifecycle/registration_test.go index f5812e381d..b0ea490d2c 100644 --- a/pkg/controllers/nodeclaim/lifecycle/registration_test.go +++ b/pkg/controllers/nodeclaim/lifecycle/registration_test.go @@ -92,7 +92,7 @@ var _ = Describe("Registration", func() { Expect(node.Labels).To(HaveKeyWithValue(v1.NodeRegisteredLabelKey, "true")) Expect(node.Spec.Taints).To(Not(ContainElement(v1.UnregisteredNoExecuteTaint))) }) - It("should fail registration if the karpenter.sh/unregistered taint is not present on the node and the node isn't labeled as registered", func() { + It("should succeed registration if the karpenter.sh/unregistered taint is not present and emit an event", func() { nodeClaim := test.NodeClaim(v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -104,12 +104,23 @@ var _ = Describe("Registration", func() { ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + // Create a node without the unregistered taint node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID}) ExpectApplied(ctx, env.Client, node) - _ = ExpectObjectReconcileFailed(ctx, env.Client, nodeClaimController, nodeClaim) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) + + // Verify the NodeClaim is registered nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) - Expect(ExpectStatusConditionExists(nodeClaim, v1.ConditionTypeRegistered).Status).To(Equal(metav1.ConditionFalse)) + Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeRegistered).IsTrue()).To(BeTrue()) + Expect(nodeClaim.Status.NodeName).To(Equal(node.Name)) + + // Verify the node is registered + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(v1.NodeRegisteredLabelKey, "true")) + + Expect(recorder.Calls("UnregisteredTaintMissing")).To(Equal(1)) }) + It("should sync the labels to the Node when the Node comes online", func() { nodeClaim := test.NodeClaim(v1.NodeClaim{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/controllers/nodeclaim/lifecycle/suite_test.go b/pkg/controllers/nodeclaim/lifecycle/suite_test.go index a4da3ad8db..2c0ef78f8d 100644 --- a/pkg/controllers/nodeclaim/lifecycle/suite_test.go +++ b/pkg/controllers/nodeclaim/lifecycle/suite_test.go @@ -28,7 +28,6 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" @@ -39,7 +38,6 @@ import ( v1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider/fake" nodeclaimlifecycle "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/lifecycle" - "sigs.k8s.io/karpenter/pkg/events" "sigs.k8s.io/karpenter/pkg/operator/options" . "sigs.k8s.io/karpenter/pkg/test/expectations" @@ -51,6 +49,7 @@ var nodeClaimController *nodeclaimlifecycle.Controller var env *test.Environment var fakeClock *clock.FakeClock var cloudProvider *fake.CloudProvider +var recorder *test.EventRecorder func TestAPIs(t *testing.T) { ctx = TestContextWithLogger(t) @@ -60,6 +59,7 @@ func TestAPIs(t *testing.T) { var _ = BeforeSuite(func() { fakeClock = clock.NewFakeClock(time.Now()) + recorder = test.NewEventRecorder() env = test.NewEnvironment(test.WithCRDs(apis.CRDs...), test.WithCRDs(v1alpha1.CRDs...), test.WithFieldIndexers(func(c cache.Cache) error { return c.IndexField(ctx, &corev1.Node{}, "spec.providerID", func(obj client.Object) []string { return []string{obj.(*corev1.Node).Spec.ProviderID} @@ -68,7 +68,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) cloudProvider = fake.NewCloudProvider() - nodeClaimController = nodeclaimlifecycle.NewController(fakeClock, env.Client, cloudProvider, events.NewRecorder(&record.FakeRecorder{})) + nodeClaimController = nodeclaimlifecycle.NewController(fakeClock, env.Client, cloudProvider, recorder) }) var _ = AfterSuite(func() { @@ -85,6 +85,7 @@ var _ = Describe("Finalizer", func() { var nodePool *v1.NodePool BeforeEach(func() { + recorder.Reset() // Reset the events that we captured during the run nodePool = test.NodePool() }) It("should add the finalizer if it doesn't exist", func() { diff --git a/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml b/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml index d36c4e448c..f16803a2be 100644 --- a/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml +++ b/pkg/test/v1alpha1/crds/karpenter.test.sh_testnodeclasses.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.16.5 + controller-gen.kubebuilder.io/version: v0.17.2 name: testnodeclasses.karpenter.test.sh spec: group: karpenter.test.sh diff --git a/test/pkg/environment/common/monitor.go b/test/pkg/environment/common/monitor.go index d3feb0b327..5c2f5668c9 100644 --- a/test/pkg/environment/common/monitor.go +++ b/test/pkg/environment/common/monitor.go @@ -135,7 +135,7 @@ func (m *Monitor) DeletedNodes() []*corev1.Node { func (m *Monitor) PendingPods(selector labels.Selector) []*corev1.Pod { var pods []*corev1.Pod for _, pod := range m.poll().pods.Items { - pod := pod + if pod.Status.Phase != corev1.PodPending { continue } @@ -154,7 +154,7 @@ func (m *Monitor) PendingPodsCount(selector labels.Selector) int { func (m *Monitor) RunningPods(selector labels.Selector) []*corev1.Pod { var pods []*corev1.Pod for _, pod := range m.poll().pods.Items { - pod := pod + if pod.Status.Phase != corev1.PodRunning { continue }