diff --git a/cmd/machine-config-daemon/firstboot_complete_machineconfig.go b/cmd/machine-config-daemon/firstboot_complete_machineconfig.go index 46f078843e..1ff2829943 100644 --- a/cmd/machine-config-daemon/firstboot_complete_machineconfig.go +++ b/cmd/machine-config-daemon/firstboot_complete_machineconfig.go @@ -5,6 +5,8 @@ import ( "fmt" "time" + daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" + daemon "github.com/openshift/machine-config-operator/pkg/daemon" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -21,11 +23,14 @@ var firstbootCompleteMachineconfig = &cobra.Command{ var persistNics bool +var machineConfigFile string + // init executes upon import func init() { rootCmd.AddCommand(firstbootCompleteMachineconfig) firstbootCompleteMachineconfig.PersistentFlags().StringVar(&startOpts.rootMount, "root-mount", "/rootfs", "where the nodes root filesystem is mounted for chroot and file manipulation.") firstbootCompleteMachineconfig.PersistentFlags().BoolVar(&persistNics, "persist-nics", false, "Run nmstatectl persist-nic-names") + firstbootCompleteMachineconfig.PersistentFlags().StringVar(&machineConfigFile, "machineconfig-file", daemonconsts.MachineConfigEncapsulatedPath, "The MachineConfig file on-disk to use as the source of truth.") pflag.CommandLine.AddGoFlagSet(flag.CommandLine) } @@ -55,7 +60,7 @@ func runFirstBootCompleteMachineConfig(_ *cobra.Command, _ []string) error { return err } - return dn.RunFirstbootCompleteMachineconfig() + return dn.RunFirstbootCompleteMachineconfig(machineConfigFile) } func executeFirstbootCompleteMachineConfig(cmd *cobra.Command, args []string) { diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index c191f0c26b..19996e807b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -1207,8 +1207,8 @@ func (dn *Daemon) RunOnceFrom(onceFrom string, skipReboot bool) error { // RunFirstbootCompleteMachineconfig is run via systemd on the first boot // to complete processing of the target MachineConfig. -func (dn *Daemon) RunFirstbootCompleteMachineconfig() error { - data, err := os.ReadFile(constants.MachineConfigEncapsulatedPath) +func (dn *Daemon) RunFirstbootCompleteMachineconfig(machineConfigFile string) error { + data, err := os.ReadFile(machineConfigFile) if err != nil { return err } @@ -1287,9 +1287,18 @@ func (dn *Daemon) RunFirstbootCompleteMachineconfig() error { return err } - // Removing this file signals completion of the initial MC processing. - if err := os.Rename(constants.MachineConfigEncapsulatedPath, constants.MachineConfigEncapsulatedBakPath); err != nil { - return fmt.Errorf("failed to rename encapsulated MachineConfig after processing on firstboot: %w", err) + if machineConfigFile == constants.MachineConfigEncapsulatedPath { + // Removing this file signals completion of the initial MC processing. + if err := os.Rename(constants.MachineConfigEncapsulatedPath, constants.MachineConfigEncapsulatedBakPath); err != nil { + return fmt.Errorf("failed to rename encapsulated MachineConfig after processing on firstboot: %w", err) + } + } + + // If we re-bootstrapped the node, we should disable and remove the systemd + // unit that we used to do that. This function will no-op and return nil if + // the systemd unit is not present. + if err := dn.disableRevertSystemdUnit(); err != nil { + return err } dn.skipReboot = false @@ -2456,16 +2465,6 @@ func (dn *Daemon) triggerUpdate(currentConfig, desiredConfig *mcfgv1.MachineConf return dn.triggerUpdateWithMachineConfig(currentConfig, desiredConfig, true) } - // If the desired image annotation is empty, but the current image is not - // empty, this should be a regular MachineConfig update. - // - // However, the node will not roll back from a layered config to a - // non-layered config without admin intervention; so we should emit an error - // for now. - if desiredImage == "" && currentImage != "" { - return fmt.Errorf("rolling back from a layered to non-layered configuration is not currently supported") - } - // Shut down the Config Drift Monitor since we'll be performing an update // and the config will "drift" while the update is occurring. dn.stopConfigDriftMonitor() diff --git a/pkg/daemon/runtimeassets/README.md b/pkg/daemon/runtimeassets/README.md new file mode 100644 index 0000000000..75361c7394 --- /dev/null +++ b/pkg/daemon/runtimeassets/README.md @@ -0,0 +1,25 @@ +# runtimeassets + +The intent behind this package is to provide a place for assets that the MCD +must render at runtime. Generally speaking, these are used to work around edge +cases where it may not be desirable to have a config file or systemd unit +rendered as part of the normal MachineConfig flow. + +These could be static files or they could be runtime-rendered templates based +upon the status of the MCD and/or objects the MCD can access, such as +ControllerConfig. + +The RuntimeAsset interface specifies that any additional types or files added +to this package should be rendered as Ignition. This will allow the MCD to +write the files to the nodes' filesystem using the standard Ignition file +writing paths that currently exist. + +In the future, items in this package could be expanded and used in the +certificate-writer path as well as other similar paths. + +## RevertService + +This is used for reverting from a layeed MachineConfigPool to a non-layered +MachineConfigPool. This is because the systemd unit that performs this function +should not be part of the default MachineConfig. Instead, it should be rendered +and applied on an as-needed basis. diff --git a/pkg/daemon/runtimeassets/machine-config-daemon-revert.service.yaml b/pkg/daemon/runtimeassets/machine-config-daemon-revert.service.yaml new file mode 100644 index 0000000000..c02b4597c3 --- /dev/null +++ b/pkg/daemon/runtimeassets/machine-config-daemon-revert.service.yaml @@ -0,0 +1,29 @@ +# This systemd unit is intended to help revert from a layered state to a +# non-layered state. It is heavily based upon the +# machine-config-daemon-firstboot.service.yaml found in: +# templates/common/_base/units/machine-config-daemon-firstboot.service.yaml +name: {{ .ServiceName }} +enabled: true +contents: | + [Unit] + Description=Machine Config Daemon Revert + # Make sure it runs only on OSTree booted system + ConditionPathExists=/run/ostree-booted + # Removal of this file signals firstboot completion + ConditionPathExists={{ .RevertServiceMachineConfigFile }} + After=network.target + + [Service] + Type=oneshot + RemainAfterExit=yes + # Disable existing repos (if any) so that OS extensions would use embedded RPMs only + ExecStartPre=-/usr/bin/sh -c "sed -i 's/enabled=1/enabled=0/' /etc/yum.repos.d/*.repo" + # Run this via podman because we want to use the nmstatectl binary in our container + ExecStart=/usr/bin/podman run --authfile=/var/lib/kubelet/config.json --rm --privileged --net=host -v /:/rootfs --entrypoint machine-config-daemon '{{ .MCOImage }}' firstboot-complete-machineconfig --persist-nics --machineconfig-file {{ .RevertServiceMachineConfigFile }} + ExecStart=/usr/bin/podman run --authfile=/var/lib/kubelet/config.json --rm --privileged --pid=host --net=host -v /:/rootfs --entrypoint machine-config-daemon '{{ .MCOImage }}' firstboot-complete-machineconfig --machineconfig-file {{ .RevertServiceMachineConfigFile }} + {{if .Proxy -}} + EnvironmentFile=/etc/mco/proxy.env + {{end -}} + + [Install] + RequiredBy=multi-user.target diff --git a/pkg/daemon/runtimeassets/revertservice.go b/pkg/daemon/runtimeassets/revertservice.go new file mode 100644 index 0000000000..a26089d6bd --- /dev/null +++ b/pkg/daemon/runtimeassets/revertservice.go @@ -0,0 +1,112 @@ +package runtimeassets + +import ( + "bytes" + _ "embed" + "fmt" + "html/template" + + ign3types "github.com/coreos/ignition/v2/config/v3_4/types" + mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + "sigs.k8s.io/yaml" +) + +var _ RuntimeAsset = &revertService{} + +const ( + RevertServiceName string = "machine-config-daemon-revert.service" + RevertServiceMachineConfigFile string = "/etc/mco/machineconfig-revert.json" +) + +//go:embed machine-config-daemon-revert.service.yaml +var mcdRevertServiceIgnYAML string + +type revertService struct { + // The MCO image pullspec that should be used. + MCOImage string + // Whether the proxy file exists and should be considered. + Proxy bool +} + +// Constructs a revertService instance from a ControllerConfig. Returns an +// error if the provided ControllerConfig cannot be used. +func NewRevertService(ctrlcfg *mcfgv1.ControllerConfig) (RuntimeAsset, error) { + mcoImage, ok := ctrlcfg.Spec.Images["machineConfigOperator"] + if !ok { + return nil, fmt.Errorf("controllerconfig Images does not have machineConfigOperator image") + } + + if mcoImage == "" { + return nil, fmt.Errorf("controllerconfig Images has machineConfigOperator but it is empty") + } + + hasProxy := false + if ctrlcfg.Spec.Proxy != nil { + hasProxy = true + } + + return &revertService{ + MCOImage: mcoImage, + Proxy: hasProxy, + }, nil +} + +// Returns an Ignition config containing the +// machine-config-daemon-revert.service systemd unit. +func (r *revertService) Ignition() (*ign3types.Config, error) { + rendered, err := r.render() + if err != nil { + return nil, err + } + + out := &ign3types.Unit{} + + if err := yaml.Unmarshal(rendered, out); err != nil { + return nil, err + } + + ignConfig := ctrlcommon.NewIgnConfig() + ignConfig.Systemd = ign3types.Systemd{ + Units: []ign3types.Unit{ + *out, + }, + } + + return &ignConfig, nil +} + +// Renders the embedded template with the provided values. +func (r *revertService) render() ([]byte, error) { + if r.MCOImage == "" { + return nil, fmt.Errorf("MCOImage field must be provided") + } + + tmpl := template.New(RevertServiceName) + + tmpl, err := tmpl.Parse(mcdRevertServiceIgnYAML) + if err != nil { + return nil, err + } + + // Golang templates must be rendered using exported fields. However, we want + // to manage the exported field for this service, so we create an anonymous + // struct which embeds RevertService and the ServiceName before we render + // the template. + data := struct { + ServiceName string + RevertServiceMachineConfigFile string + revertService + }{ + ServiceName: RevertServiceName, + RevertServiceMachineConfigFile: RevertServiceMachineConfigFile, + revertService: *r, + } + + buf := bytes.NewBuffer([]byte{}) + if err := tmpl.Execute(buf, data); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} diff --git a/pkg/daemon/runtimeassets/revertservice_test.go b/pkg/daemon/runtimeassets/revertservice_test.go new file mode 100644 index 0000000000..e71fe46839 --- /dev/null +++ b/pkg/daemon/runtimeassets/revertservice_test.go @@ -0,0 +1,96 @@ +package runtimeassets + +import ( + "fmt" + "testing" + + configv1 "github.com/openshift/api/config/v1" + mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + "github.com/stretchr/testify/assert" +) + +func TestRevertService(t *testing.T) { + mcoImagePullspec := "mco.image.pullspec" + + proxyContents := "EnvironmentFile=/etc/mco/proxy.env" + + alwaysExpectedContents := []string{ + fmt.Sprintf("ConditionPathExists=%s", RevertServiceMachineConfigFile), + fmt.Sprintf("podman run --authfile=/var/lib/kubelet/config.json --rm --privileged --net=host -v /:/rootfs --entrypoint machine-config-daemon '%s' firstboot-complete-machineconfig --persist-nics --machineconfig-file %s", mcoImagePullspec, RevertServiceMachineConfigFile), + fmt.Sprintf("podman run --authfile=/var/lib/kubelet/config.json --rm --privileged --pid=host --net=host -v /:/rootfs --entrypoint machine-config-daemon '%s' firstboot-complete-machineconfig --machineconfig-file %s", mcoImagePullspec, RevertServiceMachineConfigFile), + } + + testCases := []struct { + name string + ctrlcfg *mcfgv1.ControllerConfig + expectedContents []string + unexpectedContents []string + errExpected bool + }{ + { + name: "no proxy", + ctrlcfg: &mcfgv1.ControllerConfig{ + Spec: mcfgv1.ControllerConfigSpec{ + Images: map[string]string{ + "machineConfigOperator": mcoImagePullspec, + }, + }, + }, + expectedContents: alwaysExpectedContents, + unexpectedContents: []string{ + proxyContents, + }, + }, + { + name: "with proxy", + ctrlcfg: &mcfgv1.ControllerConfig{ + Spec: mcfgv1.ControllerConfigSpec{ + Proxy: &configv1.ProxyStatus{}, + Images: map[string]string{ + "machineConfigOperator": mcoImagePullspec, + }, + }, + }, + expectedContents: append(alwaysExpectedContents, proxyContents), + }, + { + name: "no mco image found", + ctrlcfg: &mcfgv1.ControllerConfig{}, + errExpected: true, + }, + } + + for _, testCase := range testCases { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + rs, err := NewRevertService(testCase.ctrlcfg) + if testCase.errExpected { + assert.Error(t, err) + assert.Nil(t, rs) + return + } else { + assert.NoError(t, err) + assert.NotNil(t, rs) + } + + ign, err := rs.Ignition() + assert.NoError(t, err) + assert.NotNil(t, ign) + + unit := ign.Systemd.Units[0] + + for _, item := range testCase.expectedContents { + assert.Contains(t, *unit.Contents, item) + } + + for _, item := range testCase.unexpectedContents { + assert.NotContains(t, *unit.Contents, item) + } + + assert.Equal(t, unit.Name, RevertServiceName) + assert.True(t, *unit.Enabled) + }) + } +} diff --git a/pkg/daemon/runtimeassets/runtimeassets.go b/pkg/daemon/runtimeassets/runtimeassets.go new file mode 100644 index 0000000000..bbdceb5061 --- /dev/null +++ b/pkg/daemon/runtimeassets/runtimeassets.go @@ -0,0 +1,9 @@ +package runtimeassets + +import ( + ign3types "github.com/coreos/ignition/v2/config/v3_4/types" +) + +type RuntimeAsset interface { + Ignition() (*ign3types.Config, error) +} diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 72d0365d01..83ef3a1d12 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -40,6 +40,7 @@ import ( "github.com/openshift/machine-config-operator/pkg/daemon/constants" pivottypes "github.com/openshift/machine-config-operator/pkg/daemon/pivot/types" pivotutils "github.com/openshift/machine-config-operator/pkg/daemon/pivot/utils" + "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" ) @@ -856,7 +857,13 @@ func (dn *Daemon) updateOnClusterBuild(oldConfig, newConfig *mcfgv1.MachineConfi // If the new image pullspec is already on disk, do not attempt to re-apply // it. rpm-ostree will throw an error as a result. // See: https://issues.redhat.com/browse/OCPBUGS-18414. - if oldImage != newImage && newImage != "" { + if oldImage != newImage { + // If the new image field is empty, set it to the OS image URL value + // provided by the MachineConfig to do a rollback. + if newImage == "" { + klog.Infof("%s empty, reverting to osImageURL %s from MachineConfig %s", constants.DesiredImageAnnotationKey, newConfig.Spec.OSImageURL, newConfig.Name) + newImage = newConfig.Spec.OSImageURL + } if err := dn.updateLayeredOSToPullspec(newImage); err != nil { return err } @@ -864,6 +871,13 @@ func (dn *Daemon) updateOnClusterBuild(oldConfig, newConfig *mcfgv1.MachineConfi klog.Infof("Image pullspecs equal, skipping rpm-ostree rebase") } + // If the new OS image equals the OS image URL value, this means we're in a + // revert-from-layering situation. This also means we can return early after + // taking a different path. + if newImage == newConfig.Spec.OSImageURL { + return dn.finalizeRevertToNonLayering(newConfig) + } + // update files on disk that need updating if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil { return err @@ -951,6 +965,60 @@ func (dn *Daemon) updateOnClusterBuild(oldConfig, newConfig *mcfgv1.MachineConfi return dn.reboot(fmt.Sprintf("Node will reboot into image %s / MachineConfig %s", newImage, newConfigName)) } +// Finalizes the revert process by enabling a special systemd unit prior to +// rebooting the node. +// +// After we write the original factory image to the node, none of the files +// specified in the MachineConfig will be written due to how rpm-ostree handles +// file writes. Because those files are owned by the layered container image, +// they are not present after reboot; even if we were to write them to the node +// before rebooting. Consequently, after reverting back to the original image, +// the node will lose contact with the control plane and the easiest way to +// reestablish contact is to rebootstrap the node. +// +// By comparison, if we write a file that is _not_ owned by the layered +// container image, this file will persist after reboot. So what we do is write +// a special systemd unit that will rebootstrap the node upon reboot. +// Unfortunately, this will incur a second reboot during the rollback process, +// so there is room for improvement here. +func (dn *Daemon) finalizeRevertToNonLayering(newConfig *mcfgv1.MachineConfig) error { + // First, we write the new MachineConfig to a file. This is both the signal + // that the revert systemd unit should fire as well as the desired source of + // truth. + outBytes, err := json.Marshal(newConfig) + if err != nil { + return fmt.Errorf("could not marshal MachineConfig %q to JSON: %w", newConfig.Name, err) + } + + if err := writeFileAtomicallyWithDefaults(runtimeassets.RevertServiceMachineConfigFile, outBytes); err != nil { + return fmt.Errorf("could not write MachineConfig %q to %q: %w", newConfig.Name, runtimeassets.RevertServiceMachineConfigFile, err) + } + + klog.Infof("Wrote MachineConfig %q to %q", newConfig.Name, runtimeassets.RevertServiceMachineConfigFile) + + // Next, we enable the revert systemd unit. This renders and writes the + // machine-config-daemon-revert.service systemd unit, clones it, and writes + // it to disk. The reason for doing it this way is because it will persist + // after the reboot since it was not written or mutated by the rpm-ostree + // process. + if err := dn.enableRevertSystemdUnit(); err != nil { + return err + } + + // Clear the current image field so that after reboot, the node will clear + // the currentImage annotation. + odc := &onDiskConfig{ + currentImage: "", + currentConfig: newConfig, + } + + if err := dn.storeCurrentConfigOnDisk(odc); err != nil { + return err + } + + return dn.reboot(fmt.Sprintf("Node will reboot into image %s / MachineConfig %s", newConfig.Spec.OSImageURL, newConfig.Name)) +} + // Update the node to the provided node configuration. // This function should be de-duped with dn.updateHypershift() and // dn.updateOnClusterBuild(). See: https://issues.redhat.com/browse/MCO-810 for @@ -2825,3 +2893,76 @@ func (dn *Daemon) hasImageRegistryDrainOverrideConfigMap() (bool, error) { return false, fmt.Errorf("Error fetching image registry drain override configmap: %w", err) } + +// Enables the revert layering systemd unit. +// +// To enable the unit, we perform the following operations: +// 1. Retrieve the ControllerConfig. +// 2. Generate the Ignition config from the ControllerConfig. +// 3. Writes the new systemd unit to disk and enables it. +func (dn *Daemon) enableRevertSystemdUnit() error { + ctrlcfg, err := dn.ccLister.Get(ctrlcommon.ControllerConfigName) + if err != nil { + return fmt.Errorf("could not get controllerconfig %s: %w", ctrlcommon.ControllerConfigName, err) + } + + revertService, err := runtimeassets.NewRevertService(ctrlcfg) + if err != nil { + return err + } + + revertIgn, err := revertService.Ignition() + if err != nil { + return fmt.Errorf("could not create %s: %w", runtimeassets.RevertServiceName, err) + } + + if err := dn.writeUnits(revertIgn.Systemd.Units); err != nil { + return fmt.Errorf("could not write %s: %w", runtimeassets.RevertServiceName, err) + } + + return nil +} + +// Disables the revert layering systemd unit, if it is present. +// +// To disable the unit, it performs the following operations: +// 1. Checks for the presence of the systemd unit file. If not present, it will +// no-op. +// 2. If the unit file is present, it will disable the unit using the default +// MCD code paths for that purpose. +// 3. It will ensure that the unit file is removed as well as the file that the +// Ignition config was written to. +func (dn *Daemon) disableRevertSystemdUnit() error { + unitPath := filepath.Join(pathSystemd, runtimeassets.RevertServiceName) + + unitPathExists, err := fileExists(unitPath) + if err != nil { + return fmt.Errorf("could not determine if service %q exists: %w", runtimeassets.RevertServiceName, err) + } + + // If the unit path does not exist, there is nothing left to do. + if !unitPathExists { + return nil + } + + // If we've reached this point, we know that the unit file is still present, + // which means that the unit may still be enabled. + if err := dn.disableUnits([]string{runtimeassets.RevertServiceName}); err != nil { + return err + } + + filesToRemove := []string{ + unitPath, + runtimeassets.RevertServiceMachineConfigFile, + } + + // systemd removes the unit file, but there is no harm in calling + // os.RemoveAll() since it will return nil if the file does not exist. + for _, fileToRemove := range filesToRemove { + if err := os.RemoveAll(fileToRemove); err != nil { + return err + } + } + + return nil +} diff --git a/test/e2e-techpreview/onclusterlayering_test.go b/test/e2e-techpreview/onclusterlayering_test.go index 193a8115d8..0216813c1b 100644 --- a/test/e2e-techpreview/onclusterlayering_test.go +++ b/test/e2e-techpreview/onclusterlayering_test.go @@ -15,6 +15,7 @@ import ( mcfgv1 "github.com/openshift/api/machineconfiguration/v1" mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1" + "github.com/openshift/machine-config-operator/pkg/daemon/runtimeassets" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/assert" @@ -119,14 +120,31 @@ func TestOnClusterBuildRollsOutImage(t *testing.T) { cs := framework.NewClientSet("") node := helpers.GetRandomNode(t, cs, "worker") - t.Cleanup(makeIdempotentAndRegister(t, func() { - helpers.DeleteNodeAndMachine(t, cs, node) - })) - - helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName)) + unlabelFunc := makeIdempotentAndRegister(t, helpers.LabelNode(t, cs, node, helpers.MCPNameToRole(layeredMCPName))) helpers.WaitForNodeImageChange(t, cs, node, imagePullspec) + helpers.AssertNodeBootedIntoImage(t, cs, node, imagePullspec) + t.Logf("Node %s is booted into image %q", node.Name, imagePullspec) + t.Log(helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "cowsay", "Moo!")) + + unlabelFunc() + + assertNodeRevertsToNonLayered(t, cs, node) +} + +func assertNodeRevertsToNonLayered(t *testing.T, cs *framework.ClientSet, node corev1.Node) { + workerMCName := helpers.GetMcName(t, cs, "worker") + workerMC, err := cs.MachineConfigs().Get(context.TODO(), workerMCName, metav1.GetOptions{}) + require.NoError(t, err) + + helpers.WaitForNodeConfigAndImageChange(t, cs, node, workerMCName, "") + + helpers.AssertNodeBootedIntoImage(t, cs, node, workerMC.Spec.OSImageURL) + t.Logf("Node %s has reverted to OS image %q", node.Name, workerMC.Spec.OSImageURL) + + helpers.AssertFileNotOnNode(t, cs, node, filepath.Join("/etc/systemd/system", runtimeassets.RevertServiceName)) + helpers.AssertFileNotOnNode(t, cs, node, runtimeassets.RevertServiceMachineConfigFile) } // This test extracts the /etc/yum.repos.d and /etc/pki/rpm-gpg content from a diff --git a/test/e2e/osimageurl_override_test.go b/test/e2e/osimageurl_override_test.go index 99f16937e7..7682aa63ba 100644 --- a/test/e2e/osimageurl_override_test.go +++ b/test/e2e/osimageurl_override_test.go @@ -1,14 +1,12 @@ package e2e import ( - "fmt" "os" "testing" ign3types "github.com/coreos/ignition/v2/config/v3_4/types" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" - "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" ) @@ -76,12 +74,8 @@ func assertNodeDoesNotHaveBinaries(t *testing.T, cs *framework.ClientSet, node c // Creates a new MachineConfigPool, adds the given node to it, and overrides // the osImageURL with the provided OS image name. func applyCustomOSToNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, osImageURL, poolName string) func() { - getRpmOstreeStatus := func() string { - return helpers.ExecCmdOnNode(t, cs, node, "chroot", "/rootfs", "rpm-ostree", "status") - } - // Do a pre-run assertion to ensure that we are not in the new OS image. - assert.NotContains(t, getRpmOstreeStatus(), osImageURL, fmt.Sprintf("node %q already booted into %q", node.Name, osImageURL)) + helpers.AssertNodeNotBootedIntoImage(t, cs, node, osImageURL) mc := helpers.NewMachineConfig("custom-os-image", helpers.MCLabelForRole(poolName), osImageURL, []ign3types.File{}) @@ -90,7 +84,7 @@ func applyCustomOSToNode(t *testing.T, cs *framework.ClientSet, node corev1.Node undoFunc := helpers.CreatePoolAndApplyMCToNode(t, cs, poolName, node, mc) // Assert that we've booted into the new custom OS image. - assert.Contains(t, getRpmOstreeStatus(), osImageURL, fmt.Sprintf("node %q did not boot into %q", node.Name, osImageURL)) + helpers.AssertNodeBootedIntoImage(t, cs, node, osImageURL) t.Logf("Node %q has booted into %q", node.Name, osImageURL) @@ -99,7 +93,7 @@ func applyCustomOSToNode(t *testing.T, cs *framework.ClientSet, node corev1.Node undoFunc() // Assert that rpm-ostree indicates we're not running the custom OS image anymore. - assert.NotContains(t, getRpmOstreeStatus(), osImageURL, fmt.Sprintf("node %q did not roll back to previous OS image", node.Name)) + helpers.AssertNodeNotBootedIntoImage(t, cs, node, osImageURL) t.Logf("Node %q has returned to its previous OS image", node.Name) }) diff --git a/test/helpers/utils.go b/test/helpers/utils.go index 55d45ad85b..21d2f3fd06 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -4,6 +4,7 @@ import ( "bytes" "compress/gzip" "context" + "encoding/json" "fmt" "log" "math/rand" @@ -44,6 +45,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + rpmostreeclient "github.com/coreos/rpmostree-client-go/pkg/client" opv1 "github.com/openshift/api/operator/v1" mcoac "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" ) @@ -831,6 +833,72 @@ func ExecCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subA return string(out) } +// Gets the rpm-ostree status for a given node. +func GetRPMOStreeStatusForNode(t *testing.T, cs *framework.ClientSet, node corev1.Node) *rpmostreeclient.Status { + status, err := getRPMOStreeStatusForNode(cs, node) + require.NoError(t, err) + + return status +} + +// Internal-only version of GetRPMOStreeStatusForNode() +func getRPMOStreeStatusForNode(cs *framework.ClientSet, node corev1.Node) (*rpmostreeclient.Status, error) { + cmd, err := execCmdOnNode(cs, node, "chroot", "/rootfs", "rpm-ostree", "status", "--json") + if err != nil { + return nil, fmt.Errorf("could not construct command: %w", err) + } + + output, err := cmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("could not execute command %s on node %s: %w", cmd.String(), node.Name, err) + } + + status := &rpmostreeclient.Status{} + + if err := json.Unmarshal(output, status); err != nil { + return nil, fmt.Errorf("could not parse rpm-ostree status for node %s: %w", node.Name, err) + } + + return status, nil +} + +// Gets the currently booted rpmostree deployment for a given node. +func getBootedRPMOstreeDeployment(cs *framework.ClientSet, node corev1.Node) (*rpmostreeclient.Deployment, error) { + status, err := getRPMOStreeStatusForNode(cs, node) + if err != nil { + return nil, fmt.Errorf("could not get rpm-ostree status for node %s: %w", node.Name, err) + } + + for _, deployment := range status.Deployments { + deployment := deployment + if deployment.Booted { + return &deployment, nil + } + } + + return nil, fmt.Errorf("no booted deployments found on node %s", node.Name) +} + +// Asserts that a given node is booted into a specific image. This works by +// examining the rpm-ostree status and determining if the currently booted +// deployment is the expected image. +func AssertNodeBootedIntoImage(t *testing.T, cs *framework.ClientSet, node corev1.Node, expectedImagePullspec string) bool { + deployment, err := getBootedRPMOstreeDeployment(cs, node) + require.NoError(t, err) + + return assert.Contains(t, deployment.ContainerImageReference, expectedImagePullspec, "expected node %q to be booted into image %q, got %q", node.Name, expectedImagePullspec, deployment.ContainerImageReference) +} + +// Asserts that a given node is not booted into a specific image. This works by +// examining the rpm-ostree status and determining if the currently booted +// deployment is the expected image. +func AssertNodeNotBootedIntoImage(t *testing.T, cs *framework.ClientSet, node corev1.Node, unexpectedImagePullspec string) bool { + deployment, err := getBootedRPMOstreeDeployment(cs, node) + require.NoError(t, err) + + return assert.NotContains(t, deployment.ContainerImageReference, unexpectedImagePullspec, "expected node %q not to be booted into %q", node.Name, unexpectedImagePullspec) +} + // ExecCmdOnNodeWithError behaves like ExecCmdOnNode, with the exception that // any errors are returned to the caller for inspection. This allows one to // execute a command that is expected to fail; e.g., stat /nonexistant/file.