Skip to content

Commit ce9e3fe

Browse files
towcanojnhuh
authored andcommitted
DRA: refactor utils related to NodeInfos
simulator.BuildNodeInfoForNode, core_utils.GetNodeInfoFromTemplate, and scheduler_utils.DeepCopyTemplateNode all had very similar logic for sanitizing and copying NodeInfos. They're all consolidated to one file in simulator, sharing common logic. MixedTemplateNodeInfoProvider now correctly uses ClusterSnapshot to correlate Nodes to scheduled pods, instead of using a live Pod lister. This means that the snapshot now has to be properly initialized in a bunch of tests.
1 parent 17ef915 commit ce9e3fe

File tree

15 files changed

+330
-310
lines changed

15 files changed

+330
-310
lines changed

cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import (
2525

2626
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
2727
"k8s.io/autoscaler/cluster-autoscaler/context"
28-
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
2928
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups"
3029
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
3130
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
31+
"k8s.io/autoscaler/cluster-autoscaler/simulator"
3232
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
3333
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
3434
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
@@ -110,7 +110,7 @@ func (s *AsyncNodeGroupInitializer) InitializeNodeGroup(result nodegroups.AsyncN
110110
mainCreatedNodeGroup := result.CreationResult.MainCreatedNodeGroup
111111
// If possible replace candidate node-info with node info based on crated node group. The latter
112112
// one should be more in line with nodes which will be created by node group.
113-
nodeInfo, aErr := utils.GetNodeInfoFromTemplate(mainCreatedNodeGroup, s.daemonSets, s.taintConfig)
113+
nodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(mainCreatedNodeGroup, s.daemonSets, s.taintConfig)
114114
if aErr != nil {
115115
klog.Warningf("Cannot build node info for newly created main node group %s. Using fallback. Error: %v", mainCreatedNodeGroup.Id(), aErr)
116116
nodeInfo = s.nodeInfo

cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ import (
2727
"k8s.io/autoscaler/cluster-autoscaler/context"
2828
"k8s.io/autoscaler/cluster-autoscaler/core/scaleup/equivalence"
2929
"k8s.io/autoscaler/cluster-autoscaler/core/scaleup/resource"
30-
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
3130
"k8s.io/autoscaler/cluster-autoscaler/estimator"
3231
"k8s.io/autoscaler/cluster-autoscaler/expander"
3332
"k8s.io/autoscaler/cluster-autoscaler/metrics"
3433
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
3534
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups"
3635
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
3736
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
37+
"k8s.io/autoscaler/cluster-autoscaler/simulator"
3838
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
3939
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
4040
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
@@ -527,7 +527,7 @@ func (o *ScaleUpOrchestrator) CreateNodeGroup(
527527

528528
// If possible replace candidate node-info with node info based on crated node group. The latter
529529
// one should be more in line with nodes which will be created by node group.
530-
mainCreatedNodeInfo, aErr := utils.GetNodeInfoFromTemplate(createNodeGroupResult.MainCreatedNodeGroup, daemonSets, o.taintConfig)
530+
mainCreatedNodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(createNodeGroupResult.MainCreatedNodeGroup, daemonSets, o.taintConfig)
531531
if aErr == nil {
532532
nodeInfos[createNodeGroupResult.MainCreatedNodeGroup.Id()] = mainCreatedNodeInfo
533533
schedulablePodGroups[createNodeGroupResult.MainCreatedNodeGroup.Id()] = o.SchedulablePodGroups(podEquivalenceGroups, createNodeGroupResult.MainCreatedNodeGroup, mainCreatedNodeInfo)
@@ -542,7 +542,7 @@ func (o *ScaleUpOrchestrator) CreateNodeGroup(
542542
delete(schedulablePodGroups, oldId)
543543
}
544544
for _, nodeGroup := range createNodeGroupResult.ExtraCreatedNodeGroups {
545-
nodeInfo, aErr := utils.GetNodeInfoFromTemplate(nodeGroup, daemonSets, o.taintConfig)
545+
nodeInfo, aErr := simulator.TemplateNodeInfoFromNodeGroupTemplate(nodeGroup, daemonSets, o.taintConfig)
546546
if aErr != nil {
547547
klog.Warningf("Cannot build node info for newly created extra node group %v; balancing similar node groups will not work; err=%v", nodeGroup.Id(), aErr)
548548
continue

cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,8 @@ func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestR
10481048
// build orchestrator
10491049
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
10501050
assert.NoError(t, err)
1051+
err = context.ClusterSnapshot.Initialize(nodes, kube_util.ScheduledPods(pods))
1052+
assert.NoError(t, err)
10511053
nodeInfos, err := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).
10521054
Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
10531055
assert.NoError(t, err)
@@ -1129,13 +1131,15 @@ func TestScaleUpUnhealthy(t *testing.T) {
11291131
SetNodeReadyState(n1, true, someTimeAgo)
11301132
n2 := BuildTestNode("n2", 1000, 1000)
11311133
SetNodeReadyState(n2, true, someTimeAgo)
1134+
nodes := []*apiv1.Node{n1, n2}
11321135

11331136
p1 := BuildTestPod("p1", 80, 0)
11341137
p2 := BuildTestPod("p2", 800, 0)
11351138
p1.Spec.NodeName = "n1"
11361139
p2.Spec.NodeName = "n2"
1140+
pods := []*apiv1.Pod{p1, p2}
11371141

1138-
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p1, p2})
1142+
podLister := kube_util.NewTestPodLister(pods)
11391143
listers := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil)
11401144

11411145
provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error {
@@ -1154,8 +1158,8 @@ func TestScaleUpUnhealthy(t *testing.T) {
11541158
}
11551159
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
11561160
assert.NoError(t, err)
1157-
1158-
nodes := []*apiv1.Node{n1, n2}
1161+
err = context.ClusterSnapshot.Initialize(nodes, pods)
1162+
assert.NoError(t, err)
11591163
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
11601164
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
11611165
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
@@ -1197,7 +1201,8 @@ func TestBinpackingLimiter(t *testing.T) {
11971201

11981202
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
11991203
assert.NoError(t, err)
1200-
1204+
err = context.ClusterSnapshot.Initialize(nodes, nil)
1205+
assert.NoError(t, err)
12011206
nodeInfos, err := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).
12021207
Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
12031208
assert.NoError(t, err)
@@ -1232,11 +1237,13 @@ func TestScaleUpNoHelp(t *testing.T) {
12321237
n1 := BuildTestNode("n1", 100, 1000)
12331238
now := time.Now()
12341239
SetNodeReadyState(n1, true, now.Add(-2*time.Minute))
1240+
nodes := []*apiv1.Node{n1}
12351241

12361242
p1 := BuildTestPod("p1", 80, 0)
12371243
p1.Spec.NodeName = "n1"
1244+
pods := []*apiv1.Pod{p1}
12381245

1239-
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p1})
1246+
podLister := kube_util.NewTestPodLister(pods)
12401247
listers := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil)
12411248

12421249
provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error {
@@ -1254,8 +1261,8 @@ func TestScaleUpNoHelp(t *testing.T) {
12541261
}
12551262
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
12561263
assert.NoError(t, err)
1257-
1258-
nodes := []*apiv1.Node{n1}
1264+
err = context.ClusterSnapshot.Initialize(nodes, pods)
1265+
assert.NoError(t, err)
12591266
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
12601267
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
12611268
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
@@ -1409,7 +1416,8 @@ func TestComputeSimilarNodeGroups(t *testing.T) {
14091416
listers := kube_util.NewListerRegistry(nil, nil, kube_util.NewTestPodLister(nil), nil, nil, nil, nil, nil, nil)
14101417
ctx, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{BalanceSimilarNodeGroups: tc.balancingEnabled}, &fake.Clientset{}, listers, provider, nil, nil)
14111418
assert.NoError(t, err)
1412-
1419+
err = ctx.ClusterSnapshot.Initialize(nodes, nil)
1420+
assert.NoError(t, err)
14131421
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
14141422
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
14151423
assert.NoError(t, clusterState.UpdateNodes(nodes, nodeInfos, time.Now()))
@@ -1473,7 +1481,8 @@ func TestScaleUpBalanceGroups(t *testing.T) {
14731481
}
14741482
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
14751483
assert.NoError(t, err)
1476-
1484+
err = context.ClusterSnapshot.Initialize(nodes, podList)
1485+
assert.NoError(t, err)
14771486
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
14781487
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
14791488
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
@@ -1649,6 +1658,8 @@ func TestScaleUpToMeetNodeGroupMinSize(t *testing.T) {
16491658
assert.NoError(t, err)
16501659

16511660
nodes := []*apiv1.Node{n1, n2}
1661+
err = context.ClusterSnapshot.Initialize(nodes, nil)
1662+
assert.NoError(t, err)
16521663
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now())
16531664
processors := NewTestProcessors(&context)
16541665
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())

cluster-autoscaler/core/scaleup/resource/manager_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ func TestDeltaForNode(t *testing.T) {
7272

7373
ng := testCase.nodeGroupConfig
7474
group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem)
75+
err := ctx.ClusterSnapshot.Initialize(nodes, nil)
76+
assert.NoError(t, err)
7577
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now())
7678

7779
rm := NewManager(processors.CustomResourcesProcessor)
@@ -113,6 +115,8 @@ func TestResourcesLeft(t *testing.T) {
113115

114116
ng := testCase.nodeGroupConfig
115117
_, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem)
118+
err := ctx.ClusterSnapshot.Initialize(nodes, nil)
119+
assert.NoError(t, err)
116120
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now())
117121

118122
rm := NewManager(processors.CustomResourcesProcessor)
@@ -164,6 +168,8 @@ func TestApplyLimits(t *testing.T) {
164168

165169
ng := testCase.nodeGroupConfig
166170
group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem)
171+
err := ctx.ClusterSnapshot.Initialize(nodes, nil)
172+
assert.NoError(t, err)
167173
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now())
168174

169175
rm := NewManager(processors.CustomResourcesProcessor)
@@ -229,6 +235,8 @@ func TestResourceManagerWithGpuResource(t *testing.T) {
229235
assert.NoError(t, err)
230236

231237
nodes := []*corev1.Node{n1}
238+
err = context.ClusterSnapshot.Initialize(nodes, nil)
239+
assert.NoError(t, err)
232240
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now())
233241

234242
rm := NewManager(processors.CustomResourcesProcessor)

cluster-autoscaler/core/static_autoscaler.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ import (
5252
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
5353
caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
5454
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
55-
scheduler_utils "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
5655
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
5756
"k8s.io/utils/integer"
5857

@@ -1016,7 +1015,7 @@ func getUpcomingNodeInfos(upcomingCounts map[string]int, nodeInfos map[string]*f
10161015
// Ensure new nodes have different names because nodeName
10171016
// will be used as a map key. Also deep copy pods (daemonsets &
10181017
// any pods added by cloud provider on template).
1019-
nodes = append(nodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, fmt.Sprintf("upcoming-%d", i)))
1018+
nodes = append(nodes, simulator.FreshNodeInfoFromTemplateNodeInfo(nodeTemplate, fmt.Sprintf("upcoming-%d", i)))
10201019
}
10211020
upcomingNodes[nodeGroup] = nodes
10221021
}

0 commit comments

Comments
 (0)