@@ -10,6 +10,7 @@ import (
1010 "os"
1111 "path"
1212 "regexp"
13+ "sort"
1314 "strconv"
1415 "strings"
1516 "sync"
@@ -39,6 +40,7 @@ import (
3940 "k8s.io/kubectl/pkg/util/templates"
4041 admissionapi "k8s.io/pod-security-admission/api"
4142 "k8s.io/utils/exec"
43+ utilptr "k8s.io/utils/ptr"
4244
4345 configclient "github.com/openshift/client-go/config/clientset/versioned"
4446 imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1"
@@ -51,9 +53,10 @@ import (
5153)
5254
5355const (
54- gatherContainerName = "gather"
55- unreachableTaintKey = "node.kubernetes.io/unreachable"
56- notReadyTaintKey = "node.kubernetes.io/not-ready"
56+ gatherContainerName = "gather"
57+ unreachableTaintKey = "node.kubernetes.io/unreachable"
58+ notReadyTaintKey = "node.kubernetes.io/not-ready"
59+ controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane"
5760)
5861
5962var (
@@ -101,23 +104,9 @@ sleep 5
101104done`
102105)
103106
104- var (
105- tolerationNotReady = corev1.Toleration {
106- Key : "node.kubernetes.io/not-ready" ,
107- Operator : corev1 .TolerationOpExists ,
108- Effect : corev1 .TaintEffectNoSchedule ,
109- }
110-
111- tolerationMasterNoSchedule = corev1.Toleration {
112- Key : "node-role.kubernetes.io/master" ,
113- Operator : corev1 .TolerationOpExists ,
114- Effect : corev1 .TaintEffectNoSchedule ,
115- }
116- )
117-
118- // buildNodeSelector builds a node selector from the provided node names.
119- func buildNodeAffinity (nodeNames []string ) * corev1.Affinity {
120- if len (nodeNames ) == 0 {
107+ // buildNodeAffinity builds a node affinity from the provided node hostnames.
108+ func buildNodeAffinity (nodeHostnames []string ) * corev1.Affinity {
109+ if len (nodeHostnames ) == 0 {
121110 return nil
122111 }
123112 return & corev1.Affinity {
@@ -129,7 +118,7 @@ func buildNodeAffinity(nodeNames []string) *corev1.Affinity {
129118 {
130119 Key : "kubernetes.io/hostname" ,
131120 Operator : corev1 .NodeSelectorOpIn ,
132- Values : nodeNames ,
121+ Values : nodeHostnames ,
133122 },
134123 },
135124 },
@@ -444,40 +433,124 @@ func (o *MustGatherOptions) Validate() error {
444433 return nil
445434}
446435
447- // prioritizeHealthyNodes returns a preferred node to run the must-gather pod on, and a fallback node if no preferred node is found.
448- func prioritizeHealthyNodes (nodes * corev1.NodeList ) (preferred * corev1.Node , fallback * corev1.Node ) {
449- var fallbackNode * corev1.Node
450- var latestHeartbeat time.Time
436+ func getNodeLastHeartbeatTime (node corev1.Node ) * metav1.Time {
437+ for _ , cond := range node .Status .Conditions {
438+ if cond .Type == corev1 .NodeReady {
439+ if ! cond .LastHeartbeatTime .IsZero () {
440+ return utilptr.To [metav1.Time ](cond .LastHeartbeatTime )
441+ }
442+ return nil
443+ }
444+ }
445+ return nil
446+ }
447+
448+ func isNodeReadyByCondition (node corev1.Node ) bool {
449+ for _ , cond := range node .Status .Conditions {
450+ if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
451+ return true
452+ }
453+ }
454+ return false
455+ }
456+
457+ func isNodeReadyAndReachableByTaint (node corev1.Node ) bool {
458+ for _ , taint := range node .Spec .Taints {
459+ if taint .Key == unreachableTaintKey || taint .Key == notReadyTaintKey {
460+ return false
461+ }
462+ }
463+ return true
464+ }
451465
466+ // getCandidateNodeNames identifies suitable nodes for constructing a list of
467+ // node names for a node affinity expression.
468+ func getCandidateNodeNames (nodes * corev1.NodeList , hasMaster bool ) []string {
469+ // Identify ready and reacheable nodes
470+ var controlPlaneNodes , allControlPlaneNodes , workerNodes , unschedulableNodes , remainingNodes , selectedNodes []corev1.Node
452471 for _ , node := range nodes .Items {
453- var hasUnhealthyTaint bool
454- for _ , taint := range node .Spec .Taints {
455- if taint .Key == unreachableTaintKey || taint .Key == notReadyTaintKey {
456- hasUnhealthyTaint = true
457- break
458- }
472+ if _ , ok := node .Labels [controlPlaneNodeRoleLabel ]; ok {
473+ allControlPlaneNodes = append (allControlPlaneNodes , node )
474+ }
475+ if ! isNodeReadyByCondition (node ) || ! isNodeReadyAndReachableByTaint (node ) {
476+ remainingNodes = append (remainingNodes , node )
477+ continue
478+ }
479+ if node .Spec .Unschedulable {
480+ unschedulableNodes = append (unschedulableNodes , node )
481+ continue
482+ }
483+ if _ , ok := node .Labels [controlPlaneNodeRoleLabel ]; ok {
484+ controlPlaneNodes = append (controlPlaneNodes , node )
485+ } else {
486+ workerNodes = append (workerNodes , node )
459487 }
488+ }
460489
461- // Look at heartbeat time for readiness hint
462- for _ , cond := range node .Status .Conditions {
463- if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
464- // Check if heartbeat is recent (less than 2m old)
465- if time .Since (cond .LastHeartbeatTime .Time ) < 2 * time .Minute {
466- if ! hasUnhealthyTaint {
467- return & node , nil // return immediately on good candidate
468- }
469- }
470- break
471- }
490+ // INFO(ingvagabund): a single target node should be enough. Yet,
491+ // it might help to provide more to allow the scheduler choose a more
492+ // suitable node based on the cluster scheduling capabilities.
493+
494+ // hasMaster cause the must-gather pod to set a node selector targeting all
495+ // control plane node. So there's no point of processing other than control
496+ // plane nodes
497+ if hasMaster {
498+ if len (controlPlaneNodes ) > 0 {
499+ selectedNodes = controlPlaneNodes
500+ } else {
501+ selectedNodes = allControlPlaneNodes
502+ }
503+ } else {
504+ // For hypershift case
505+
506+ // Order of preference:
507+ // - ready and reachable control plane nodes first
508+ // - then ready and reachable worker nodes
509+ // - then ready and reachable unschedulable nodes
510+ // - then any other node
511+ selectedNodes = controlPlaneNodes // this will be very likely empty for hypershift
512+ if len (selectedNodes ) == 0 {
513+ selectedNodes = workerNodes
514+ }
515+ if len (selectedNodes ) == 0 {
516+ // unschedulable nodes might still be better candidates than the remaining
517+ // not ready and not reacheable nodes
518+ selectedNodes = unschedulableNodes
519+ }
520+ if len (selectedNodes ) == 0 {
521+ // whatever is left for the last resort
522+ selectedNodes = remainingNodes
523+ }
524+ }
525+
526+ // Sort nodes based on the cond.LastHeartbeatTime.Time to prefer nodes that
527+ // reported their status most recently
528+ sort .SliceStable (selectedNodes , func (i , j int ) bool {
529+ iTime := getNodeLastHeartbeatTime (selectedNodes [i ])
530+ jTime := getNodeLastHeartbeatTime (selectedNodes [j ])
531+ // iTime has no effect since all is sorted right
532+ if jTime == nil {
533+ return true
534+ }
535+ if iTime == nil {
536+ return false
472537 }
538+ return jTime .Before (iTime )
539+ })
473540
474- if fallbackNode == nil || node .CreationTimestamp .Time .After (latestHeartbeat ) {
475- fallbackNode = & node
476- latestHeartbeat = node .CreationTimestamp .Time
541+ // Limit the number of nodes to 10 at most (choosen randomly) to provide a sane
542+ // list of nodes
543+ nodeNames := []string {}
544+ var nodeNamesSize = 0
545+ for _ , n := range selectedNodes {
546+ nodeNames = append (nodeNames , n .Name )
547+ nodeNamesSize ++
548+ if nodeNamesSize >= 10 {
549+ break
477550 }
478551 }
479552
480- return nil , fallbackNode
553+ return nodeNames
481554}
482555
483556// Run creates and runs a must-gather pod
@@ -536,59 +609,25 @@ func (o *MustGatherOptions) Run() error {
536609 defer cleanupNamespace ()
537610 }
538611
539- nodes , err := o .Client .CoreV1 ().Nodes ().List (context .TODO (), metav1.ListOptions {})
612+ // Prefer to run in master if there's any but don't be explicit otherwise.
613+ // This enables the command to run by default in hypershift where there's no masters.
614+ nodes , err := o .Client .CoreV1 ().Nodes ().List (context .TODO (), metav1.ListOptions {
615+ LabelSelector : o .NodeSelector ,
616+ })
540617 if err != nil {
541618 return err
542619 }
543620
544- var cpNodes , workerNodes , unschedulableNodes []corev1.Node
545- for _ , node := range nodes .Items {
546- isReady := false
547- for _ , cond := range node .Status .Conditions {
548- if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
549- isReady = true
550- break
551- }
552- }
553- if ! isReady {
554- continue
555- }
556- if node .Spec .Unschedulable {
557- unschedulableNodes = append (unschedulableNodes , node )
558- continue
559- }
560- if _ , ok := node .Labels ["node-role.kubernetes.io/control-plane" ]; ok {
561- cpNodes = append (cpNodes , node )
562- } else {
563- workerNodes = append (workerNodes , node )
564- }
565- }
566-
567- selectedNodes := cpNodes
568- if len (selectedNodes ) == 0 {
569- selectedNodes = workerNodes
570- }
571- if len (selectedNodes ) == 0 {
572- selectedNodes = unschedulableNodes
573- }
574-
575- var nodeNames []string
576- for _ , n := range selectedNodes {
577- nodeNames = append (nodeNames , n .Name )
578- }
579-
580- affinity := buildNodeAffinity (nodeNames )
581- // If we have no nodes, we cannot run the must-gather pod.
582-
583- // Determine if there is a master node
584- hasMaster := false
621+ var hasMaster bool
585622 for _ , node := range nodes .Items {
586623 if _ , ok := node .Labels ["node-role.kubernetes.io/master" ]; ok {
587624 hasMaster = true
588625 break
589626 }
590627 }
591628
629+ affinity := buildNodeAffinity (getCandidateNodeNames (nodes , hasMaster ))
630+
592631 // ... and create must-gather pod(s)
593632 var pods []* corev1.Pod
594633 for _ , image := range o .Images {
@@ -1057,31 +1096,6 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity
10571096 cleanedSourceDir := path .Clean (o .SourceDir )
10581097 volumeUsageChecker := fmt .Sprintf (volumeUsageCheckerScript , cleanedSourceDir , cleanedSourceDir , o .VolumePercentage , o .VolumePercentage , executedCommand )
10591098
1060- // Define taints we do not want to tolerate (can be extended with user input in the future)
1061- excludedTaints := []corev1.Taint {
1062- {Key : unreachableTaintKey , Effect : corev1 .TaintEffectNoExecute },
1063- {Key : notReadyTaintKey , Effect : corev1 .TaintEffectNoSchedule },
1064- }
1065-
1066- // Define candidate tolerations we may want to apply
1067- candidateTolerations := []corev1.Toleration {tolerationNotReady }
1068- if node == "" && hasMaster {
1069- candidateTolerations = append (candidateTolerations , tolerationMasterNoSchedule )
1070- }
1071-
1072- // Build the toleration list by only adding tolerations that do NOT tolerate excluded taints
1073- filteredTolerations := make ([]corev1.Toleration , 0 , len (candidateTolerations ))
1074- TolerationLoop:
1075- for _ , tol := range candidateTolerations {
1076- for _ , excluded := range excludedTaints {
1077- if tol .ToleratesTaint (& excluded ) {
1078- // Skip this toleration if it tolerates an excluded taint
1079- continue TolerationLoop
1080- }
1081- }
1082- filteredTolerations = append (filteredTolerations , tol )
1083- }
1084-
10851099 ret := & corev1.Pod {
10861100 ObjectMeta : metav1.ObjectMeta {
10871101 GenerateName : "must-gather-" ,
0 commit comments