Skip to content

Commit 8c508c5

Browse files
authored
Merge pull request kubernetes#125527 from sanposhiho/gated-pods-filter-out-bug
fix: skip isPodWorthRequeuing only when SchedulingGates gates the pod
2 parents da479a8 + 98a3182 commit 8c508c5

File tree

3 files changed

+35
-13
lines changed

3 files changed

+35
-13
lines changed

pkg/scheduler/internal/queue/scheduling_queue.go

+18-3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"k8s.io/kubernetes/pkg/features"
4848
"k8s.io/kubernetes/pkg/scheduler/framework"
4949
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
50+
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
5051
"k8s.io/kubernetes/pkg/scheduler/internal/heap"
5152
"k8s.io/kubernetes/pkg/scheduler/metrics"
5253
"k8s.io/kubernetes/pkg/scheduler/util"
@@ -1193,11 +1194,25 @@ func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(logger klog.Logger, podIn
11931194

11941195
activated := false
11951196
for _, pInfo := range podInfoList {
1196-
// Since there may be many gated pods and they will not move from the
1197-
// unschedulable pool, we skip calling the expensive isPodWorthRequeueing.
1198-
if pInfo.Gated {
1197+
// When handling events takes time, a scheduling throughput gets impacted negatively
1198+
// because of a shared lock within PriorityQueue, which Pop() also requires.
1199+
//
1200+
// Scheduling-gated Pods never get schedulable with any events,
1201+
// except the Pods themselves got updated, which isn't handled by movePodsToActiveOrBackoffQueue.
1202+
// So, we can skip them early here so that they don't go through isPodWorthRequeuing,
1203+
// which isn't fast enough to keep a sufficient scheduling throughput
1204+
// when the number of scheduling-gated Pods in unschedulablePods is large.
1205+
// https://github.com/kubernetes/kubernetes/issues/124384
1206+
// This is a hotfix for this issue, which might be changed
1207+
// once we have a better general solution for the shared lock issue.
1208+
//
1209+
// Note that we cannot skip all pInfo.Gated Pods here
1210+
// because PreEnqueue plugins apart from the scheduling gate plugin may change the gating status
1211+
// with these events.
1212+
if pInfo.Gated && pInfo.UnschedulablePlugins.Has(names.SchedulingGates) {
11991213
continue
12001214
}
1215+
12011216
schedulingHint := p.isPodWorthRequeuing(logger, pInfo, event, oldObj, newObj)
12021217
if schedulingHint == queueSkip {
12031218
// QueueingHintFn determined that this Pod isn't worth putting to activeQ or backoffQ by this event.

pkg/scheduler/internal/queue/scheduling_queue_test.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import (
4444
"k8s.io/kubernetes/pkg/features"
4545
"k8s.io/kubernetes/pkg/scheduler/framework"
4646
plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
47+
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
4748
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/queuesort"
4849
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/schedulinggates"
4950
"k8s.io/kubernetes/pkg/scheduler/metrics"
@@ -1499,13 +1500,19 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
14991500
expectedQ: unschedulablePods,
15001501
},
15011502
{
1502-
name: "QueueHintFunction is not called when Pod is gated",
1503-
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")}),
1503+
name: "QueueHintFunction is not called when Pod is gated by SchedulingGates plugin",
1504+
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New(names.SchedulingGates, "foo")}),
15041505
hint: func(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
15051506
return framework.Queue, fmt.Errorf("QueueingHintFn should not be called as pod is gated")
15061507
},
15071508
expectedQ: unschedulablePods,
15081509
},
1510+
{
1511+
name: "QueueHintFunction is called when Pod is gated by a plugin other than SchedulingGates",
1512+
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")}),
1513+
hint: queueHintReturnQueue,
1514+
expectedQ: activeQ,
1515+
},
15091516
}
15101517

15111518
for _, test := range tests {
@@ -1518,14 +1525,13 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
15181525
QueueingHintFn: test.hint,
15191526
},
15201527
}
1521-
test.podInfo.UnschedulablePlugins = sets.New("foo")
15221528
cl := testingclock.NewFakeClock(now)
15231529
q := NewTestQueue(ctx, newDefaultQueueSort(), WithQueueingHintMapPerProfile(m), WithClock(cl))
1524-
// add to unsched pod pool
15251530
q.activeQ.Add(q.newQueuedPodInfo(test.podInfo.Pod))
15261531
if p, err := q.Pop(logger); err != nil || p.Pod != test.podInfo.Pod {
15271532
t.Errorf("Expected: %v after Pop, but got: %v", test.podInfo.Pod.Name, p.Pod.Name)
15281533
}
1534+
// add to unsched pod pool
15291535
err := q.AddUnschedulableIfNotPresent(logger, test.podInfo, q.SchedulingCycle())
15301536
if err != nil {
15311537
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)

test/integration/scheduler/plugins/plugins_test.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -2651,8 +2651,8 @@ func (pl *SchedulingGatesPluginWOEvents) EventsToRegister() []framework.ClusterE
26512651
return nil
26522652
}
26532653

2654-
// This test helps to verify registering nil events for schedulingGates plugin works as expected.
2655-
func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
2654+
// This test helps to verify registering nil events for PreEnqueue plugin works as expected.
2655+
func TestPreEnqueuePluginEventsToRegister(t *testing.T) {
26562656
testContext := testutils.InitTestAPIServer(t, "preenqueue-plugin", nil)
26572657

26582658
num := func(pl framework.Plugin) int {
@@ -2668,8 +2668,9 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
26682668
}
26692669

26702670
tests := []struct {
2671-
name string
2672-
withEvents bool
2671+
name string
2672+
withEvents bool
2673+
// count is the expected number of calls to PreEnqueue().
26732674
count int
26742675
queueHintEnabled []bool
26752676
expectedScheduled []bool
@@ -2686,7 +2687,7 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
26862687
{
26872688
name: "preEnqueue plugin with event registered",
26882689
withEvents: true,
2689-
count: 2,
2690+
count: 3,
26902691
queueHintEnabled: []bool{false, true},
26912692
expectedScheduled: []bool{true, true},
26922693
},
@@ -2700,7 +2701,7 @@ func TestSchedulingGatesPluginEventsToRegister(t *testing.T) {
27002701
t.Run(tt.name+fmt.Sprintf(" queueHint(%v)", queueHintEnabled), func(t *testing.T) {
27012702
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerQueueingHints, queueHintEnabled)
27022703

2703-
// new plugin every time to clear counts
2704+
// use new plugin every time to clear counts
27042705
var plugin framework.PreEnqueuePlugin
27052706
if tt.withEvents {
27062707
plugin = &SchedulingGatesPluginWithEvents{SchedulingGates: schedulinggates.SchedulingGates{}}

0 commit comments

Comments
 (0)