Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 156 additions & 4 deletions pkg/cli/admin/mustgather/mustgather.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"path"
"regexp"
"sort"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -38,6 +39,7 @@ import (
"k8s.io/kubectl/pkg/util/templates"
admissionapi "k8s.io/pod-security-admission/api"
"k8s.io/utils/exec"
utilptr "k8s.io/utils/ptr"

configclient "github.com/openshift/client-go/config/clientset/versioned"
imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1"
Expand All @@ -50,7 +52,10 @@ import (
)

const (
gatherContainerName = "gather"
gatherContainerName = "gather"
unreachableTaintKey = "node.kubernetes.io/unreachable"
notReadyTaintKey = "node.kubernetes.io/not-ready"
controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane"
)

var (
Expand Down Expand Up @@ -98,6 +103,30 @@ sleep 5
done`
)

// buildNodeAffinity builds a node affinity from the provided node hostnames.
func buildNodeAffinity(nodeHostnames []string) *corev1.Affinity {
if len(nodeHostnames) == 0 {
return nil
}
return &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
NodeSelectorTerms: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "kubernetes.io/hostname",
Operator: corev1.NodeSelectorOpIn,
Values: nodeHostnames,
},
},
},
},
},
},
}
}

const (
// number of concurrent must-gather Pods to run if --all-images or multiple --image are provided
concurrentMG = 4
Expand Down Expand Up @@ -403,6 +432,125 @@ func (o *MustGatherOptions) Validate() error {
return nil
}

func getNodeLastHeartbeatTime(node corev1.Node) *metav1.Time {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady {
if !cond.LastHeartbeatTime.IsZero() {
return utilptr.To[metav1.Time](cond.LastHeartbeatTime)
}
return nil
}
}
return nil
}

func isNodeReadyByCondition(node corev1.Node) bool {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
return true
}
}
return false
}

func isNodeReadyAndReachableByTaint(node corev1.Node) bool {
for _, taint := range node.Spec.Taints {
if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey {
return false
}
}
return true
}

// getCandidateNodeNames identifies suitable nodes for constructing a list of
// node names for a node affinity expression.
func getCandidateNodeNames(nodes *corev1.NodeList, hasMaster bool) []string {
// Identify ready and reacheable nodes
var controlPlaneNodes, allControlPlaneNodes, workerNodes, unschedulableNodes, remainingNodes, selectedNodes []corev1.Node
for _, node := range nodes.Items {
if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok {
allControlPlaneNodes = append(allControlPlaneNodes, node)
}
if !isNodeReadyByCondition(node) || !isNodeReadyAndReachableByTaint(node) {
remainingNodes = append(remainingNodes, node)
continue
}
if node.Spec.Unschedulable {
unschedulableNodes = append(unschedulableNodes, node)
continue
}
if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok {
controlPlaneNodes = append(controlPlaneNodes, node)
} else {
workerNodes = append(workerNodes, node)
}
}

// INFO(ingvagabund): a single target node should be enough. Yet,
// it might help to provide more to allow the scheduler choose a more
// suitable node based on the cluster scheduling capabilities.

// hasMaster cause the must-gather pod to set a node selector targeting all
// control plane node. So there's no point of processing other than control
// plane nodes
if hasMaster {
if len(controlPlaneNodes) > 0 {
selectedNodes = controlPlaneNodes
} else {
selectedNodes = allControlPlaneNodes
}
} else {
// For hypershift case

// Order of preference:
// - ready and reachable control plane nodes first
// - then ready and reachable worker nodes
// - then ready and reachable unschedulable nodes
// - then any other node
selectedNodes = controlPlaneNodes // this will be very likely empty for hypershift
if len(selectedNodes) == 0 {
selectedNodes = workerNodes
}
if len(selectedNodes) == 0 {
// unschedulable nodes might still be better candidates than the remaining
// not ready and not reacheable nodes
selectedNodes = unschedulableNodes
}
if len(selectedNodes) == 0 {
// whatever is left for the last resort
selectedNodes = remainingNodes
}
}

// Sort nodes based on the cond.LastHeartbeatTime.Time to prefer nodes that
// reported their status most recently
sort.SliceStable(selectedNodes, func(i, j int) bool {
iTime := getNodeLastHeartbeatTime(selectedNodes[i])
jTime := getNodeLastHeartbeatTime(selectedNodes[j])
// iTime has no effect since all is sorted right
if jTime == nil {
return true
}
if iTime == nil {
return false
}
return jTime.Before(iTime)
})

// Limit the number of nodes to 10 at most to provide a sane list of nodes
nodeNames := []string{}
var nodeNamesSize = 0
for _, n := range selectedNodes {
nodeNames = append(nodeNames, n.Name)
nodeNamesSize++
if nodeNamesSize >= 10 {
break
}
}

return nodeNames
}

// Run creates and runs a must-gather pod
func (o *MustGatherOptions) Run() error {
var errs []error
Expand Down Expand Up @@ -475,6 +623,8 @@ func (o *MustGatherOptions) Run() error {
}
}

affinity := buildNodeAffinity(getCandidateNodeNames(nodes, hasMaster))

// ... and create must-gather pod(s)
var pods []*corev1.Pod
for _, image := range o.Images {
Expand All @@ -496,7 +646,7 @@ func (o *MustGatherOptions) Run() error {
return err
}
for _, node := range nodes.Items {
pods = append(pods, o.newPod(node.Name, image, hasMaster))
pods = append(pods, o.newPod(node.Name, image, hasMaster, affinity))
}
} else {
if o.NodeName != "" {
Expand All @@ -506,7 +656,7 @@ func (o *MustGatherOptions) Run() error {
return err
}
}
pods = append(pods, o.newPod(o.NodeName, image, hasMaster))
pods = append(pods, o.newPod(o.NodeName, image, hasMaster, affinity))
}
}

Expand Down Expand Up @@ -924,7 +1074,8 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding {
// newPod creates a pod with 2 containers with a shared volume mount:
// - gather: init containers that run gather command
// - copy: no-op container we can exec into
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.Pod {
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod {

zero := int64(0)

nodeSelector := map[string]string{
Expand Down Expand Up @@ -1024,6 +1175,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
Operator: "Exists",
},
},
Affinity: affinity,
},
}
if o.HostNetwork {
Expand Down
Loading