From ea1d58c27e909c3dd460d2f29fa7656ae6e92944 Mon Sep 17 00:00:00 2001 From: Yu Qi Zhang Date: Wed, 13 Jan 2021 14:13:45 -0500 Subject: [PATCH] update.go: add broken symlink check + removal during unit enable If unit enable fails, remove broken symlinks in multi-user.target.wants and try again. This fixes a bug where enables would fail on cluster upgrades with RHEL 7 nodes between 4.6 -> 4.7. Context: before https://github.com/openshift/machine-config-operator/pull/2145, the MCO hard coded a symlink from /etc/system/systemd/$UNIT to /etc/systemd/system/multi-user.target.wants/$UNIT, which is not the case for every unit and thus caused broken symlinks. On RHCOS/FCOS, the systemd version is newer and is able to remove broken symlinks, but on RHEL 7 nodes, it will not first attempt to remove broken symlinks and thus fails the enable. As a workaround, this PR thus attempts to remove broken symlinks when the first enable fails, and then try again. Successful FCOS/RHCOS upgrades should not hit this, and failing ones would report full errors. The error checking is perhaps a bit overkill but the original bug case should only run through this logic once before it is fixed. Future errors are likely actual errors and will be reported as such. Signed-off-by: Yu Qi Zhang --- pkg/daemon/update.go | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index e8470a92ed..fdecdf20ba 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1432,7 +1432,37 @@ func (dn *Daemon) enableUnits(units []string) error { args := append([]string{"enable"}, units...) stdouterr, err := exec.Command("systemctl", args...).CombinedOutput() if err != nil { - return fmt.Errorf("error enabling unit: %s", stdouterr) + // In RHEL7, the systemd version is too low, so it is unable to handle broken + // symlinks during enable. Do a best-effort removal of potentially broken + // hard coded symlinks and try again. + // See: https://bugzilla.redhat.com/show_bug.cgi?id=1913536 + wantsPathSystemd := "/etc/systemd/system/multi-user.target.wants/" + for _, unit := range units { + unitLinkPath := filepath.Join(wantsPathSystemd, unit) + fi, fiErr := os.Lstat(unitLinkPath) + if fiErr != nil { + if !os.IsNotExist(fiErr) { + return fmt.Errorf("error trying to enable unit, fallback failed with %s (original error %s)", + fiErr, stdouterr) + } + continue + } + if fi.Mode()&os.ModeSymlink == 0 { + return fmt.Errorf("error trying to enable unit, a non-symlink file exists at %s (original error %s)", + unitLinkPath, stdouterr) + } + if _, evalErr := filepath.EvalSymlinks(unitLinkPath); evalErr != nil { + // this is a broken symlink, remove + if rmErr := os.Remove(unitLinkPath); rmErr != nil { + return fmt.Errorf("error trying to enable unit, cannot remove broken symlink: %s (original error %s)", + rmErr, stdouterr) + } + } + } + stdouterr, err := exec.Command("systemctl", args...).CombinedOutput() + if err != nil { + return fmt.Errorf("error enabling unit: %s", stdouterr) + } } glog.Infof("Enabled systemd units: %v", units) return nil