diff --git a/pkg/controller/common/layered_node_state.go b/pkg/controller/common/layered_node_state.go index 3876359b30..bc2e413cd5 100644 --- a/pkg/controller/common/layered_node_state.go +++ b/pkg/controller/common/layered_node_state.go @@ -59,6 +59,10 @@ func (l *LayeredNodeState) isDesiredImageEqualToBuild(mosc *mcfgv1alpha1.Machine return l.isImageAnnotationEqualToBuild(daemonconsts.DesiredImageAnnotationKey, mosc) } +func (l *LayeredNodeState) IsCurrentImageEqualToBuild(mosc *mcfgv1alpha1.MachineOSConfig) bool { + return l.isImageAnnotationEqualToBuild(daemonconsts.CurrentImageAnnotationKey, mosc) +} + func (l *LayeredNodeState) isDesiredMachineConfigEqualToBuild(mosb *mcfgv1alpha1.MachineOSBuild) bool { return l.node.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] == mosb.Spec.DesiredConfig.Name diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index 829f73134f..810dd41e01 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -625,7 +625,7 @@ func (ctrl *Controller) updateNode(old, cur interface{}) { // Specifically log when a node has completed an update so the MCC logs are a useful central aggregate of state changes if oldNode.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] != oldNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] && - isNodeDone(curNode) { + isNodeDone(curNode, false) { ctrl.logPoolNode(pool, curNode, "Completed update to %s", curNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey]) changed = true } else { @@ -1010,10 +1010,10 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { } klog.V(4).Infof("Continuing updates for layered pool %s", pool.Name) + } else { + klog.V(4).Infof("Pool %s is not layered", pool.Name) } - klog.V(4).Infof("Pool %s is not layered", pool.Name) - nodes, err := ctrl.getNodesForPool(pool) if err != nil { if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil { @@ -1166,7 +1166,7 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1alpha1.MachineOSConfig, if mosb == nil { if lns.IsDesiredEqualToPool(pool, layered) { // If the node's desired annotations match the pool, return directly without updating the node. - klog.Infof("no update is needed") + klog.V(4).Infof("Pool %s: node %s: no update is needed", pool.Name, nodeName) return nil } @@ -1175,7 +1175,7 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1alpha1.MachineOSConfig, } else { if lns.IsDesiredEqualToBuild(mosc, mosb) { // If the node's desired annotations match the pool, return directly without updating the node. - klog.Infof("no update is needed") + klog.V(4).Infof("Pool %s: node %s: no update is needed", pool.Name, nodeName) return nil } // ensure this is happening. it might not be. @@ -1207,7 +1207,7 @@ func (ctrl *Controller) updateCandidateNode(mosc *mcfgv1alpha1.MachineOSConfig, func getAllCandidateMachines(layered bool, config *mcfgv1alpha1.MachineOSConfig, build *mcfgv1alpha1.MachineOSBuild, pool *mcfgv1.MachineConfigPool, nodesInPool []*corev1.Node, maxUnavailable int) ([]*corev1.Node, uint) { unavail := getUnavailableMachines(nodesInPool, pool, layered, build) if len(unavail) >= maxUnavailable { - klog.Infof("No nodes available for updates") + klog.V(4).Infof("Pool %s: No nodes available for updates", pool.Name) return nil, 0 } capacity := maxUnavailable - len(unavail) @@ -1226,7 +1226,7 @@ func getAllCandidateMachines(layered bool, config *mcfgv1alpha1.MachineOSConfig, } else { if lns.IsDesiredEqualToBuild(config, build) { // If the node's desired annotations match the pool, return directly without updating the node. - klog.Infof("no update is needed") + klog.V(4).Infof("Pool %s: layered node %s: no update is needed", pool.Name, node.Name) continue } } @@ -1235,6 +1235,7 @@ func getAllCandidateMachines(layered bool, config *mcfgv1alpha1.MachineOSConfig, klog.V(4).Infof("node %s skipped during candidate selection as it is currently unscheduled", node.Name) continue } + klog.V(4).Infof("Pool %s: selected candidate node %s", pool.Name, node.Name) nodes = append(nodes, node) } // Nodes which are failing to target this config also count against diff --git a/pkg/controller/node/node_controller_test.go b/pkg/controller/node/node_controller_test.go index e1e9db62fb..4e77fb0fc4 100644 --- a/pkg/controller/node/node_controller_test.go +++ b/pkg/controller/node/node_controller_test.go @@ -725,9 +725,9 @@ func TestGetCandidateMachines(t *testing.T) { helpers.NewNodeBuilder("node-7").WithEqualConfigsAndImages(machineConfigV1, imageV1).WithNodeReady().Node(), helpers.NewNodeBuilder("node-8").WithEqualConfigsAndImages(machineConfigV1, imageV1).WithNodeReady().Node(), }, - expected: []string{"node-4", "node-5"}, - otherCandidates: []string{"node-6"}, - capacity: 2, + expected: []string{"node-4"}, + otherCandidates: []string{"node-5", "node-6"}, + capacity: 1, layeredPool: true, }, { // Targets https://issues.redhat.com/browse/OCPBUGS-24705. @@ -752,9 +752,9 @@ func TestGetCandidateMachines(t *testing.T) { WithMCDState(daemonconsts.MachineConfigDaemonStateDone). Node(), }, - expected: nil, - otherCandidates: nil, - capacity: 0, + expected: []string{"node-1"}, + otherCandidates: []string{"node-2"}, + capacity: 1, layeredPool: true, }, { // Targets https://issues.redhat.com/browse/OCPBUGS-24705. @@ -799,11 +799,15 @@ func TestGetCandidateMachines(t *testing.T) { pool := pb.MachineConfigPool() // TODO: Double check that mosb, mosc should be nil here and layered should be false + // NOTE: By doing this, we end up ignoring all the layered checks(only MCs diffs will be done to + // determine if a node is available and ready for an update). This will need to reworked. got := getCandidateMachines(pool, nil, nil, test.nodes, test.progress, false) nodeNames := getNamesFromNodes(got) assert.Equal(t, test.expected, nodeNames) // TODO: Double check that mosb, mosc should be nil here and layered should be false + // NOTE: By doing this, we end up ignoring all the layered checks(only MCs diffs will be done to + // determine if a node is available and ready for an update). This will need to reworked. allCandidates, capacity := getAllCandidateMachines(false, nil, nil, pool, test.nodes, test.progress) assert.Equal(t, test.capacity, capacity) var otherCandidates []string diff --git a/pkg/controller/node/status.go b/pkg/controller/node/status.go index 742fbfc1cf..15f384379d 100644 --- a/pkg/controller/node/status.go +++ b/pkg/controller/node/status.go @@ -348,7 +348,7 @@ func isNodeManaged(node *corev1.Node) bool { } // isNodeDone returns true if the current == desired and the MCD has marked done. -func isNodeDone(node *corev1.Node) bool { +func isNodeDone(node *corev1.Node, layered bool) bool { if node.Annotations == nil { return false } @@ -361,12 +361,37 @@ func isNodeDone(node *corev1.Node) bool { return false } + if layered { + // The MachineConfig annotations are loaded on boot-up by the daemon which + // isn't currently done for the image annotations, so the comparisons here + // are a bit more nuanced. + cimage, cok := node.Annotations[daemonconsts.CurrentImageAnnotationKey] + dimage, dok := node.Annotations[daemonconsts.DesiredImageAnnotationKey] + + // If desired image is not set, but the pool is layered, this node can + // be considered ready for an update. This is the very first time node + // is being opted into layering. + if !dok { + return true + } + + // If we're here, we know that a desired image annotation exists. + // If the current image annotation does not exist, it means that the node is + // not "done", as it is doing its very first update as a layered node. + if !cok { + return false + } + // Current image annotation exists; compare with desired values to determine if the node is done + return cconfig == dconfig && cimage == dimage && isNodeMCDState(node, daemonconsts.MachineConfigDaemonStateDone) + + } + return cconfig == dconfig && isNodeMCDState(node, daemonconsts.MachineConfigDaemonStateDone) } // isNodeDoneAt checks whether a node is fully updated to a targetConfig -func isNodeDoneAt(node *corev1.Node, pool *mcfgv1.MachineConfigPool) bool { - return isNodeDone(node) && node.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] == pool.Spec.Configuration.Name +func isNodeDoneAt(node *corev1.Node, pool *mcfgv1.MachineConfigPool, layered bool) bool { + return isNodeDone(node, layered) && node.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] == pool.Spec.Configuration.Name } // isNodeMCDState checks the MCD state against the state parameter @@ -397,7 +422,8 @@ func getUpdatedMachines(pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node, mo lns := ctrlcommon.NewLayeredNodeState(node) if mosb != nil && mosc != nil { mosbState := ctrlcommon.NewMachineOSBuildState(mosb) - if layered && mosbState.IsBuildSuccess() && mosb.Spec.DesiredConfig.Name == pool.Spec.Configuration.Name { + // It seems like pool image annotations are no longer being used, so node specific checks were required here + if layered && mosbState.IsBuildSuccess() && mosb.Spec.DesiredConfig.Name == pool.Spec.Configuration.Name && isNodeDoneAt(node, pool, layered) && lns.IsCurrentImageEqualToBuild(mosc) { updated = append(updated, node) } } else if lns.IsDoneAt(pool, layered) { @@ -450,13 +476,13 @@ func isNodeReady(node *corev1.Node) bool { // isNodeUnavailable is a helper function for getUnavailableMachines // See the docs of getUnavailableMachines for more info -func isNodeUnavailable(node *corev1.Node) bool { +func isNodeUnavailable(node *corev1.Node, layered bool) bool { // Unready nodes are unavailable if !isNodeReady(node) { return true } - // Ready nodes are not unavailable - if isNodeDone(node) { + // If the node is working towards a new image/MC, it is not available + if isNodeDone(node, layered) { return false } // Now we know the node isn't ready - the current config must not @@ -480,17 +506,13 @@ func getUnavailableMachines(nodes []*corev1.Node, pool *mcfgv1.MachineConfigPool mosbState := ctrlcommon.NewMachineOSBuildState(mosb) // if node is unavail, desiredConfigs match, and the build is a success, then we are unavail. // not sure on this one honestly - if layered && isNodeUnavailable(node) && mosb.Spec.DesiredConfig.Name == pool.Status.Configuration.Name && mosbState.IsBuildSuccess() { - unavail = append(unavail, node) - } - } else { - lns := ctrlcommon.NewLayeredNodeState(node) - if lns.IsUnavailable(pool, layered) { + if layered && isNodeUnavailable(node, layered) && mosb.Spec.DesiredConfig.Name == pool.Spec.Configuration.Name && mosbState.IsBuildSuccess() { unavail = append(unavail, node) } + } else if isNodeUnavailable(node, layered) { + unavail = append(unavail, node) } } - return unavail } diff --git a/pkg/controller/node/status_test.go b/pkg/controller/node/status_test.go index e5fb5fe750..a25599e97c 100644 --- a/pkg/controller/node/status_test.go +++ b/pkg/controller/node/status_test.go @@ -493,7 +493,7 @@ func TestGetUnavailableMachines(t *testing.T) { helpers.NewNodeBuilder("node-3").WithEqualConfigs(machineConfigV0).WithNodeNotReady().Node(), helpers.NewNodeBuilder("node-4").WithEqualConfigs(machineConfigV0).WithNodeReady().Node(), }, - unavail: []string{"node-0", "node-2"}, + unavail: []string{"node-0", "node-2", "node-3"}, layeredPoolWithImage: true, }, { name: "Mismatched unlayered node and layered pool with image unavailable", @@ -502,9 +502,9 @@ func TestGetUnavailableMachines(t *testing.T) { helpers.NewNodeBuilder("node-1").WithEqualConfigsAndImages(machineConfigV1, imageV1).Node(), helpers.NewNodeBuilder("node-2").WithEqualConfigsAndImages(machineConfigV1, imageV1).WithNodeNotReady().Node(), helpers.NewNodeBuilder("node-3").WithEqualConfigs(machineConfigV0).WithNodeNotReady().Node(), - helpers.NewNodeBuilder("node-4").WithEqualConfigs(machineConfigV0).WithNodeReady().Node(), + helpers.NewNodeBuilder("node-4").WithEqualConfigsAndImages(machineConfigV0, imageV1).WithNodeReady().Node(), }, - unavail: []string{"node-3"}, + unavail: []string{"node-0", "node-2", "node-3"}, layeredPool: true, }, { name: "Mismatched layered node and unlayered pool", @@ -515,7 +515,7 @@ func TestGetUnavailableMachines(t *testing.T) { helpers.NewNodeBuilder("node-3").WithEqualConfigs(machineConfigV0).WithEqualImages(imageV1).WithNodeNotReady().Node(), helpers.NewNodeBuilder("node-4").WithEqualConfigs(machineConfigV0).WithEqualImages(imageV1).WithNodeReady().Node(), }, - unavail: []string{"node-0"}, + unavail: []string{"node-0", "node-2", "node-3"}, }, { // Targets https://issues.redhat.com/browse/OCPBUGS-24705. name: "nodes working toward layered should not be considered available", @@ -550,11 +550,13 @@ func TestGetUnavailableMachines(t *testing.T) { // Need to set WithNodeReady() on all nodes to avoid short-circuiting. helpers.NewNodeBuilder("node-0"). WithEqualConfigs(machineConfigV0). + WithDesiredImage(imageV0).WithCurrentImage(imageV0). WithMCDState(daemonconsts.MachineConfigDaemonStateDone). WithNodeReady(). Node(), helpers.NewNodeBuilder("node-1"). WithEqualConfigs(machineConfigV0). + WithDesiredImage(imageV0).WithCurrentImage(imageV0). WithMCDState(daemonconsts.MachineConfigDaemonStateDone). WithNodeReady(). Node(), @@ -566,11 +568,12 @@ func TestGetUnavailableMachines(t *testing.T) { Node(), helpers.NewNodeBuilder("node-3"). WithEqualConfigs(machineConfigV0). - WithDesiredImage(imageV1).WithCurrentImage(""). + WithDesiredImage(imageV1).WithCurrentImage(imageV0). WithMCDState(daemonconsts.MachineConfigDaemonStateDone). WithNodeReady(). Node(), }, + layeredPool: true, layeredPoolWithImage: true, unavail: []string{"node-2", "node-3"}, },