diff --git a/pkg/cli/admin/mustgather/mustgather.go b/pkg/cli/admin/mustgather/mustgather.go index 9ad0ee5e64..4b7fb83d89 100644 --- a/pkg/cli/admin/mustgather/mustgather.go +++ b/pkg/cli/admin/mustgather/mustgather.go @@ -9,6 +9,7 @@ import ( "os" "path" "regexp" + "sort" "strconv" "strings" "sync" @@ -38,6 +39,7 @@ import ( "k8s.io/kubectl/pkg/util/templates" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/utils/exec" + utilptr "k8s.io/utils/ptr" configclient "github.com/openshift/client-go/config/clientset/versioned" imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1" @@ -50,7 +52,10 @@ import ( ) const ( - gatherContainerName = "gather" + gatherContainerName = "gather" + unreachableTaintKey = "node.kubernetes.io/unreachable" + notReadyTaintKey = "node.kubernetes.io/not-ready" + controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane" ) var ( @@ -98,6 +103,30 @@ sleep 5 done` ) +// buildNodeAffinity builds a node affinity from the provided node hostnames. +func buildNodeAffinity(nodeHostnames []string) *corev1.Affinity { + if len(nodeHostnames) == 0 { + return nil + } + return &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "kubernetes.io/hostname", + Operator: corev1.NodeSelectorOpIn, + Values: nodeHostnames, + }, + }, + }, + }, + }, + }, + } +} + const ( // number of concurrent must-gather Pods to run if --all-images or multiple --image are provided concurrentMG = 4 @@ -403,6 +432,125 @@ func (o *MustGatherOptions) Validate() error { return nil } +func getNodeLastHeartbeatTime(node corev1.Node) *metav1.Time { + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady { + if !cond.LastHeartbeatTime.IsZero() { + return utilptr.To[metav1.Time](cond.LastHeartbeatTime) + } + return nil + } + } + return nil +} + +func isNodeReadyByCondition(node corev1.Node) bool { + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + return true + } + } + return false +} + +func isNodeReadyAndReachableByTaint(node corev1.Node) bool { + for _, taint := range node.Spec.Taints { + if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey { + return false + } + } + return true +} + +// getCandidateNodeNames identifies suitable nodes for constructing a list of +// node names for a node affinity expression. +func getCandidateNodeNames(nodes *corev1.NodeList, hasMaster bool) []string { + // Identify ready and reacheable nodes + var controlPlaneNodes, allControlPlaneNodes, workerNodes, unschedulableNodes, remainingNodes, selectedNodes []corev1.Node + for _, node := range nodes.Items { + if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok { + allControlPlaneNodes = append(allControlPlaneNodes, node) + } + if !isNodeReadyByCondition(node) || !isNodeReadyAndReachableByTaint(node) { + remainingNodes = append(remainingNodes, node) + continue + } + if node.Spec.Unschedulable { + unschedulableNodes = append(unschedulableNodes, node) + continue + } + if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok { + controlPlaneNodes = append(controlPlaneNodes, node) + } else { + workerNodes = append(workerNodes, node) + } + } + + // INFO(ingvagabund): a single target node should be enough. Yet, + // it might help to provide more to allow the scheduler choose a more + // suitable node based on the cluster scheduling capabilities. + + // hasMaster cause the must-gather pod to set a node selector targeting all + // control plane node. So there's no point of processing other than control + // plane nodes + if hasMaster { + if len(controlPlaneNodes) > 0 { + selectedNodes = controlPlaneNodes + } else { + selectedNodes = allControlPlaneNodes + } + } else { + // For hypershift case + + // Order of preference: + // - ready and reachable control plane nodes first + // - then ready and reachable worker nodes + // - then ready and reachable unschedulable nodes + // - then any other node + selectedNodes = controlPlaneNodes // this will be very likely empty for hypershift + if len(selectedNodes) == 0 { + selectedNodes = workerNodes + } + if len(selectedNodes) == 0 { + // unschedulable nodes might still be better candidates than the remaining + // not ready and not reacheable nodes + selectedNodes = unschedulableNodes + } + if len(selectedNodes) == 0 { + // whatever is left for the last resort + selectedNodes = remainingNodes + } + } + + // Sort nodes based on the cond.LastHeartbeatTime.Time to prefer nodes that + // reported their status most recently + sort.SliceStable(selectedNodes, func(i, j int) bool { + iTime := getNodeLastHeartbeatTime(selectedNodes[i]) + jTime := getNodeLastHeartbeatTime(selectedNodes[j]) + // iTime has no effect since all is sorted right + if jTime == nil { + return true + } + if iTime == nil { + return false + } + return jTime.Before(iTime) + }) + + // Limit the number of nodes to 10 at most to provide a sane list of nodes + nodeNames := []string{} + var nodeNamesSize = 0 + for _, n := range selectedNodes { + nodeNames = append(nodeNames, n.Name) + nodeNamesSize++ + if nodeNamesSize >= 10 { + break + } + } + + return nodeNames +} + // Run creates and runs a must-gather pod func (o *MustGatherOptions) Run() error { var errs []error @@ -475,6 +623,8 @@ func (o *MustGatherOptions) Run() error { } } + affinity := buildNodeAffinity(getCandidateNodeNames(nodes, hasMaster)) + // ... and create must-gather pod(s) var pods []*corev1.Pod for _, image := range o.Images { @@ -496,7 +646,7 @@ func (o *MustGatherOptions) Run() error { return err } for _, node := range nodes.Items { - pods = append(pods, o.newPod(node.Name, image, hasMaster)) + pods = append(pods, o.newPod(node.Name, image, hasMaster, affinity)) } } else { if o.NodeName != "" { @@ -506,7 +656,7 @@ func (o *MustGatherOptions) Run() error { return err } } - pods = append(pods, o.newPod(o.NodeName, image, hasMaster)) + pods = append(pods, o.newPod(o.NodeName, image, hasMaster, affinity)) } } @@ -924,7 +1074,8 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding { // newPod creates a pod with 2 containers with a shared volume mount: // - gather: init containers that run gather command // - copy: no-op container we can exec into -func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.Pod { +func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { + zero := int64(0) nodeSelector := map[string]string{ @@ -1024,6 +1175,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P Operator: "Exists", }, }, + Affinity: affinity, }, } if o.HostNetwork { diff --git a/pkg/cli/admin/mustgather/mustgather_test.go b/pkg/cli/admin/mustgather/mustgather_test.go index 5f848b93c4..db56c50322 100644 --- a/pkg/cli/admin/mustgather/mustgather_test.go +++ b/pkg/cli/admin/mustgather/mustgather_test.go @@ -2,8 +2,10 @@ package mustgather import ( "context" + "fmt" "reflect" "testing" + "time" corev1 "k8s.io/api/core/v1" k8sapierrors "k8s.io/apimachinery/pkg/api/errors" @@ -239,3 +241,201 @@ func TestGetNamespace(t *testing.T) { }) } } + +func buildTestNode(name string, apply func(*corev1.Node)) *corev1.Node { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + SelfLink: fmt.Sprintf("/api/v1/nodes/%s", name), + Labels: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + }, + }, + } + if apply != nil { + apply(node) + } + return node +} + +func buildTestControlPlaneNode(name string, apply func(*corev1.Node)) *corev1.Node { + node := buildTestNode(name, apply) + node.ObjectMeta.Labels[controlPlaneNodeRoleLabel] = "" + return node +} + +func TestGetCandidateNodeNames(t *testing.T) { + tests := []struct { + description string + nodeList *corev1.NodeList + hasMaster bool + nodeNames []string + }{ + { + description: "No nodes are ready and reacheable (hasMaster=true)", + nodeList: &corev1.NodeList{}, + hasMaster: true, + nodeNames: []string{}, + }, + { + description: "No nodes are ready and reacheable (hasMaster=false)", + nodeList: &corev1.NodeList{}, + hasMaster: false, + nodeNames: []string{}, + }, + { + description: "Control plane nodes are ready and reacheable", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", nil), + *buildTestControlPlaneNode("controlplane2", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2", "controlplane1"}, + }, + { + description: "Some control plane nodes are not ready or reacheable", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", nil), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane4", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2"}, + }, + { + description: "Mix of control plane and worker nodes (at least one reachable and ready control plane node)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", nil), + *buildTestNode("controlplane3", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2"}, + }, + { + description: "Mix of control plane and worker nodes (no ready and reachable control plane node)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + *buildTestControlPlaneNode("worker1", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"worker1"}, + }, + { + description: "Mix of control plane and worker nodes (no ready and not reachable nodes with unschedulable)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + *buildTestControlPlaneNode("worker1", func(node *corev1.Node) { + node.Spec.Unschedulable = true + }), + }, + }, + nodeNames: []string{"worker1"}, + }, + { + description: "No ready and reachable nodes (nodes with the recent teartbeat time are sorted left)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{} + }), + *buildTestNode("worker1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Time{Time: time.Date(2025, time.July, 10, 10, 20, 0, 0, time.UTC)}}, + } + }), + *buildTestNode("other1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Time{Time: time.Date(2025, time.July, 10, 10, 30, 0, 0, time.UTC)}}, + } + }), + }, + }, + hasMaster: false, + nodeNames: []string{"other1", "worker1", "controlplane2", "controlplane1"}, + }, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + names := getCandidateNodeNames(test.nodeList, test.hasMaster) + if !reflect.DeepEqual(test.nodeNames, names) { + t.Fatalf("Expected and computed list of node names differ. Expected %#v. Got %#v.", test.nodeNames, names) + } + }) + } +}