Skip to content

Commit d6c0d7a

Browse files
committed
Add unit test for determineBestPEGToFastpath and some fixes
1 parent 550141b commit d6c0d7a

File tree

2 files changed

+74
-19
lines changed

2 files changed

+74
-19
lines changed

cluster-autoscaler/estimator/binpacking_estimator.go

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,15 @@ func (e *BinpackingNodeEstimator) Estimate(
109109

110110
podsEquivalenceGroups = e.podOrderer.Order(podsEquivalenceGroups, nodeTemplate, nodeGroup)
111111

112+
useFastpathOnLastPEG := false
112113
if e.fastpathBinpackingEnabled {
113114
bestFastpathPEGindex := determineBestPEGToFastpath(podsEquivalenceGroups, nodeTemplate)
114115
if bestFastpathPEGindex != -1 {
115116
bestFastpathPEG := podsEquivalenceGroups[bestFastpathPEGindex]
116117
pegsWithoutBestForFastpath := append(podsEquivalenceGroups[:bestFastpathPEGindex], podsEquivalenceGroups[bestFastpathPEGindex+1:]...)
117118
// We'll put at the end the PEG which will benefit the most from fastpath binpacking, since it runs only on the last PEG
118119
podsEquivalenceGroups = append(pegsWithoutBestForFastpath, bestFastpathPEG)
120+
useFastpathOnLastPEG = true
119121
}
120122
}
121123

@@ -137,8 +139,8 @@ func (e *BinpackingNodeEstimator) Estimate(
137139
}
138140

139141
if newNodesAvailable {
140-
// Since fastpath binpacking adds just one node to the snapshot, it will cause unaccurate simulations on subsequent loops, therefore we only use it on the last group
141-
if e.fastpathBinpackingEnabled && i == len(podsEquivalenceGroups)-1 && shouldUseFastPath(remainingPods) {
142+
// Since fastpath binpacking adds just one node to the snapshot, it will cause inaccurate simulations on subsequent loops, therefore we only use it on the last group
143+
if i == len(podsEquivalenceGroups)-1 && useFastpathOnLastPEG {
142144
newNodesAvailable, err = e.tryFastPath(estimationState, nodeTemplate, remainingPods)
143145
} else {
144146
newNodesAvailable, err = e.tryToScheduleOnNewNodes(estimationState, nodeTemplate, remainingPods)
@@ -264,6 +266,9 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
264266
return true, nil
265267
}
266268

269+
// An optimized version of tryToScheduleOnNewNodes
270+
// It attempts to pack as much pods as possible on a single node, and then estimates the total
271+
// amount of nodes by simple arithmetic: EstimatedNodes = ceil(TotalPods / PodsFittingOnASingleNode)
267272
func (e *BinpackingNodeEstimator) tryFastPath(
268273
estimationState *estimationState,
269274
nodeTemplate *framework.NodeInfo,
@@ -389,18 +394,6 @@ func observeBinpackingHeterogeneity(podsEquivalenceGroups []PodEquivalenceGroup,
389394
metrics.ObserveBinpackingHeterogeneity(instanceType, cpuCount, nsCountBucket, len(podsEquivalenceGroups))
390395
}
391396

392-
func hasNonHostnameTopologyConstraint(pod *apiv1.Pod) bool {
393-
if pod.Spec.TopologySpreadConstraints == nil {
394-
return false
395-
}
396-
for _, constraint := range pod.Spec.TopologySpreadConstraints {
397-
if constraint.TopologyKey != apiv1.LabelHostname {
398-
return true
399-
}
400-
}
401-
return false
402-
}
403-
404397
func hasNonHostnamePodAntiAffinity(pod *apiv1.Pod) bool {
405398
if pod.Spec.Affinity == nil || pod.Spec.Affinity.PodAntiAffinity == nil || pod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
406399
return false
@@ -418,15 +411,19 @@ func shouldUseFastPath(pods []*apiv1.Pod) bool {
418411
return false
419412
}
420413

421-
if hasNonHostnameTopologyConstraint(pods[0]) || hasNonHostnamePodAntiAffinity(pods[0]) {
414+
// Fastpath assumes that once a pod failed to schedule on a node, it will never be possible to schedule any more pods on this node.
415+
// This assumption is not correct in cases of topology spread constraints.
416+
// Fastpath also assumes that if it was able to schedule pods on a new node, it will be able to then schedule an equal amount of pods on an additional identical new node,
417+
// which isn't correct in cases of zonal/regional pod anti affinity.
418+
if pods[0].Spec.TopologySpreadConstraints != nil || hasNonHostnamePodAntiAffinity(pods[0]) {
422419
return false
423420
}
424421

425422
return true
426423
}
427424

428425
// determineBestPEGToFastpath returns the index of the pod equivalence group that would benefit the most from fastpath binpacking (= largest expected number of (nodes * pods))
429-
// returns -1 if it wasn't possible to determine such group
426+
// returns -1 if none of the groups support fastpath binpacking
430427
func determineBestPEGToFastpath(podsEquivalenceGroups []PodEquivalenceGroup, nodeTemplate *framework.NodeInfo) int {
431428
maxNodesTimesPods := 0
432429
bestPEGIndex := -1
@@ -445,8 +442,8 @@ func determineBestPEGToFastpath(podsEquivalenceGroups []PodEquivalenceGroup, nod
445442
}
446443
}
447444
}
448-
if peg.Exemplar().Spec.Resources != nil && peg.Exemplar().Spec.Resources.Requests != nil {
449-
resourcesRequests := peg.Exemplar().Spec.Resources.Requests
445+
if len(peg.Exemplar().Spec.Containers) > 0 && peg.Exemplar().Spec.Containers[0].Resources.Requests != nil {
446+
resourcesRequests := peg.Exemplar().Spec.Containers[0].Resources.Requests
450447
if resourcesRequests.Cpu() != nil {
451448
numNodesByCpu = int(math.Ceil(float64(len(peg.Pods)) * resourcesRequests.Cpu().AsApproximateFloat64() / nodeTemplate.Node().Status.Capacity.Cpu().AsApproximateFloat64()))
452449
}
@@ -455,7 +452,8 @@ func determineBestPEGToFastpath(podsEquivalenceGroups []PodEquivalenceGroup, nod
455452
}
456453
}
457454
nodesTimesPods := max(numNodesByAntiAffinity, numNodesByCpu, numNodesByMemory) * len(peg.Pods)
458-
if nodesTimesPods > maxNodesTimesPods && shouldUseFastPath(peg.Pods) {
455+
// In case the (nodes*pods) score is equal, we still want to take the latest pod group to remain as close to the original pod ordering as possible
456+
if nodesTimesPods >= maxNodesTimesPods && shouldUseFastPath(peg.Pods) {
459457
bestPEGIndex = i
460458
maxNodesTimesPods = nodesTimesPods
461459
}

cluster-autoscaler/estimator/binpacking_estimator_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,60 @@ func BenchmarkBinpackingEstimate(b *testing.B) {
312312
assert.Equal(b, expectPodCount, len(estimatedPods))
313313
}
314314
}
315+
316+
func TestDetermineBestPodEquivalenceGroupToFastpath(t *testing.T) {
317+
testCases := []struct {
318+
name string
319+
podsEquivalenceGroups []PodEquivalenceGroup
320+
expectResult int
321+
}{
322+
{
323+
name: "both groups 1 node, take group with more pods",
324+
podsEquivalenceGroups: []PodEquivalenceGroup{
325+
makePodEquivalenceGroup(BuildTestPod("estimatee1", 5, 5), 4), // 1 node * 4 pods = 4
326+
makePodEquivalenceGroup(BuildTestPod("estimatee2", 5, 6), 3), // 1 node * 3 pods = 3
327+
},
328+
expectResult: 0,
329+
},
330+
{
331+
name: "take group with largest pods*nodes, even if it has less pods",
332+
podsEquivalenceGroups: []PodEquivalenceGroup{
333+
makePodEquivalenceGroup(BuildTestPod("estimatee1", 5, 5), 4), // 1 node * 4 pods = 4
334+
makePodEquivalenceGroup(BuildTestPod("estimatee2", 80, 80), 3), // 3 nodes * 3 pods = 9
335+
},
336+
expectResult: 1,
337+
},
338+
{
339+
name: "take group with largest pods*nodes, even if it has less nodes",
340+
podsEquivalenceGroups: []PodEquivalenceGroup{
341+
makePodEquivalenceGroup(BuildTestPod("estimatee1", 5, 5), 10), // 1 node * 10 pods = 10
342+
makePodEquivalenceGroup(BuildTestPod("estimatee2", 80, 80), 3), // 3 nodes * 3 pods = 9
343+
},
344+
expectResult: 0,
345+
},
346+
{
347+
name: "no fastpath supporting pod groups",
348+
podsEquivalenceGroups: []PodEquivalenceGroup{
349+
makePodEquivalenceGroup(BuildTestPod("estimatee1", 5, 5, WithMaxSkew(2, "kubernetes.io/hostname", 1)), 10),
350+
makePodEquivalenceGroup(BuildTestPod("estimatee2", 8, 8, WithMaxSkew(2, "kubernetes.io/hostname", 1)), 3),
351+
},
352+
expectResult: -1,
353+
},
354+
{
355+
name: "no resource requests defaults to last group, still valid fastpath group",
356+
podsEquivalenceGroups: []PodEquivalenceGroup{
357+
makePodEquivalenceGroup(&apiv1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "estimatee1"}, Spec: apiv1.PodSpec{Containers: []apiv1.Container{}}}, 2),
358+
makePodEquivalenceGroup(&apiv1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "estimatee2"}, Spec: apiv1.PodSpec{Containers: []apiv1.Container{}}}, 4),
359+
},
360+
expectResult: 1,
361+
},
362+
}
363+
364+
for _, tc := range testCases {
365+
t.Run(tc.name, func(t *testing.T) {
366+
nodeInfo := framework.NewTestNodeInfo(makeNode(100, 100, 10, "oldnode", "zone-jupiter"))
367+
result := determineBestPEGToFastpath(tc.podsEquivalenceGroups, nodeInfo)
368+
assert.Equal(t, tc.expectResult, result)
369+
})
370+
}
371+
}

0 commit comments

Comments
 (0)