Skip to content

Commit

Permalink
OCPBUGS-3009: Prune stale CCRs before aggregating scan results
Browse files Browse the repository at this point in the history
Previously, the compliance operator would leave CCRs around and then
just overwrite them on subsequent scans. While the most recent scan data
was accurate, because it was overwriting existing check results, it gave
the impression that some changes weren't taking effect.

For example, if you create a tailored profile, run a scan, exclude a
rule, and rerun the scan, it appears the change you just made never took
effect because the result from the rule you ignored still exists.

To avoid this, let's prune stale results when we aggregate new results.
  • Loading branch information
rhmdnd committed Nov 10, 2023
1 parent a1ce852 commit f043d19
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ Versioning](https://semver.org/spec/v2.0.0.html).

- The operator now parses links from the compliance content and renders it in
custom resources accordingly.
- `ComplianceCheckResult` objects are automatically cleaned up on each scan.
This prevents the [illusion](https://issues.redhat.com/browse/OCPBUGS-3009)
that changes to profiles, like excluding rules, aren't taking effect because
stale `ComplianceCheckResult` objects are leftover from previous runs.

- The operator have the ability to hide warnings for certain failed to fetched
resources, this is useful when the user does not want to see the warnings
Expand Down
48 changes: 48 additions & 0 deletions cmd/manager/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/version"
Expand Down Expand Up @@ -557,6 +558,33 @@ func createResults(crClient aggregatorCrClient, scan *compv1alpha1.ComplianceSca
// aggregator need to know it's using gRPC under the hood, probably
// not).

// Find all the existing scan results. As we iterate through the list
// of the most recent results below, we should remove entries from the
// list of existing results. By the end of the loop, we should have a
// list of results that are effectively stale. For example, checks that
// have been disabled through a tailored profile. If we don't clean
// these up during the aggregation phase, it will look like they're
// still valid, even though they're from an old scan.
staleComplianceCheckResults := make(map[string]compv1alpha1.ComplianceCheckResult)
complianceCheckResults := compv1alpha1.ComplianceCheckResultList{}
withLabel := map[string]string{
compv1alpha1.ComplianceScanLabel: scan.Name,
}
lo := runtimeclient.ListOptions{
Namespace: scan.Namespace,
LabelSelector: labels.SelectorFromSet(withLabel),
}
err := crClient.getClient().List(context.TODO(), &complianceCheckResults, &lo)
if err != nil {
return fmt.Errorf("Unable to fetch existing ComplianceCheckResultList: %w", err)
}
for _, r := range complianceCheckResults.Items {
// Use a map so that we can find specific
// ComplianceCheckResults without iterating over the list for
// every new result from the latest scan.
staleComplianceCheckResults[r.Name] = r
}

for _, pr := range consistentResults {
if pr == nil || pr.CheckResult == nil {
cmdLog.Info("nil result or result.check, this shouldn't happen")
Expand Down Expand Up @@ -592,6 +620,14 @@ func createResults(crClient aggregatorCrClient, scan *compv1alpha1.ComplianceSca
if err := createOrUpdateOneResult(crClient, scan, checkResultLabels, checkResultAnnotations, checkResultExists, pr.CheckResult); err != nil {
return fmt.Errorf("cannot create or update checkResult %s: %v", pr.CheckResult.Name, err)
}

// Remove the ComplianceCheckResult from the list of stale
// results so we don't delete it later.
_, ok := staleComplianceCheckResults[foundCheckResult.Name]
if ok {
delete(staleComplianceCheckResults, foundCheckResult.Name)
}

// Handle forwarding.
f.SendComplianceCheckResult(pr.CheckResult)

Expand All @@ -615,6 +651,18 @@ func createResults(crClient aggregatorCrClient, scan *compv1alpha1.ComplianceSca
}
}

// If there are any ComplianceCheckResults left in
// staleComplianceCheckResults, they were from previous scans and we
// should delete them. Otherwise, we give users the impression changes
// they've made to their scans, profiles, or settings haven't taken
// effect.
for _, result := range staleComplianceCheckResults {
err := crClient.getClient().Delete(context.TODO(), &result)
if err != nil {
return fmt.Errorf("Unable to delete stale ComplianceCheckResult %s: %w", result.Name, err)
}
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions config/rbac/remediation_aggregator_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ rules:
verbs:
- create
- get
- list
- update
- patch
- delete
- apiGroups:
- compliance.openshift.io
resources:
Expand Down
38 changes: 38 additions & 0 deletions tests/e2e/framework/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,26 @@ func (f *Framework) AssertCheckRemediation(name, namespace string, shouldHaveRem
return nil
}

func (f *Framework) AssertRemediationExists(name, namespace string) error {
var r compv1alpha1.ComplianceRemediation
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, &r)
if err != nil {
return fmt.Errorf("Failed to assert ComplianceRemediation %s exists: %w", name, err)
}
return nil
}

func (f *Framework) AssertRemediationDoesNotExists(name, namespace string) error {
var r compv1alpha1.ComplianceRemediation
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: name, Namespace: namespace}, &r)
if apierrors.IsNotFound(err) {
return nil
} else if err != nil {
return fmt.Errorf("Failed to assert ComplianceRemediation %s does not exist: %w", name, err)
}
return fmt.Errorf("Failed to assert ComplianceRemediation %s does not exist.", name)
}

func (f *Framework) TaintNode(node *core.Node, taint core.Taint) error {
taintedNode := node.DeepCopy()
if taintedNode.Spec.Taints == nil {
Expand Down Expand Up @@ -2334,3 +2354,21 @@ func (f *Framework) AssertCronJobIsNotSuspended(name string) error {
log.Printf("CronJob %s is active", name)
return nil
}

func (f *Framework) AssertScanDoesNotContainCheck(scanName, checkName, namespace string) error {
var getCheck compv1alpha1.ComplianceCheckResult
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: checkName, Namespace: namespace}, &getCheck)
if apierrors.IsNotFound(err) {
// The check doesn't exist at all, which means it also doesn't exist in *any* scans
log.Printf("didn't find ComplianceCheckResult %s", checkName)
return nil
} else if err != nil {
return err
}
// Make sure the check we found actually belongs to the scan we're given. This is extra
// validation in case the same check is used across other scans.
if getCheck.Labels[compv1alpha1.ComplianceScanLabel] == scanName {
return fmt.Errorf("found ComplianceCheckResult %s in ComplianceScan %s", checkName, scanName)
}
return nil
}
113 changes: 113 additions & 0 deletions tests/e2e/parallel/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2855,5 +2855,118 @@ func TestRuleHasProfileAnnotation(t *testing.T) {
t.Fatalf("expected to find profile %s in rule %s", profileName, rule.Name)
}
}
}

func TestScanCleansUpComplianceCheckResults(t *testing.T) {
f := framework.Global
t.Parallel()

tpName := framework.GetObjNameFromTest(t)
bindingName := tpName + "-binding"

// create a tailored profile
tp := &compv1alpha1.TailoredProfile{
ObjectMeta: metav1.ObjectMeta{
Name: tpName,
Namespace: f.OperatorNamespace,
},
Spec: compv1alpha1.TailoredProfileSpec{
Title: tpName,
Description: tpName,
Extends: "ocp4-cis",
},
}

err := f.Client.Create(context.TODO(), tp, nil)
if err != nil {
t.Fatal(err)
}
defer f.Client.Delete(context.TODO(), tp)

// run a scan
ssb := compv1alpha1.ScanSettingBinding{
ObjectMeta: metav1.ObjectMeta{
Name: bindingName,
Namespace: f.OperatorNamespace,
},
Profiles: []compv1alpha1.NamedObjectReference{
{
Name: tpName,
Kind: "TailoredProfile",
APIGroup: "compliance.openshift.io/v1alpha1",
},
},
SettingsRef: &compv1alpha1.NamedObjectReference{
Name: "default",
Kind: "ScanSetting",
APIGroup: "compliance.openshift.io/v1alpha1",
},
}
err = f.Client.Create(context.TODO(), &ssb, nil)
if err != nil {
t.Fatal(err)
}
defer f.Client.Delete(context.TODO(), &ssb)

if err := f.WaitForSuiteScansStatus(f.OperatorNamespace, bindingName, compv1alpha1.PhaseDone, compv1alpha1.ResultNonCompliant); err != nil {
t.Fatal(err)
}

// verify a compliance check result exists
checkName := tpName + "-audit-profile-set"
checkResult := compv1alpha1.ComplianceCheckResult{
ObjectMeta: metav1.ObjectMeta{
Name: checkName,
Namespace: f.OperatorNamespace,
},
ID: "xccdf_org.ssgproject.content_rule_audit_profile_set",
Status: compv1alpha1.CheckResultFail,
Severity: compv1alpha1.CheckResultSeverityMedium,
}
err = f.AssertHasCheck(bindingName, tpName, checkResult)
if err != nil {
t.Fatal(err)
}
if err := f.AssertRemediationExists(checkName, f.OperatorNamespace); err != nil {
t.Fatal(err)
}

// update tailored profile to exclude the rule before we kick off another run
tpGet := &compv1alpha1.TailoredProfile{}
err = f.Client.Get(context.TODO(), types.NamespacedName{Name: tpName, Namespace: f.OperatorNamespace}, tpGet)
if err != nil {
t.Fatal(err)
}

tpUpdate := tpGet.DeepCopy()
ruleName := "ocp4-audit-profile-set"
tpUpdate.Spec.DisableRules = []compv1alpha1.RuleReferenceSpec{
{
Name: ruleName,
Rationale: "testing to ensure scan results are cleaned up",
},
}

err = f.Client.Update(context.TODO(), tpUpdate)
if err != nil {
t.Fatal(err)
}

// rerun the scan
err = f.ReRunScan(tpName, f.OperatorNamespace)
if err != nil {
t.Fatal(err)
}
if err := f.WaitForSuiteScansStatus(f.OperatorNamespace, bindingName, compv1alpha1.PhaseDone, compv1alpha1.ResultNonCompliant); err != nil {
t.Fatal(err)
}

// verify the compliance check result doesn't exist, which will also
// mean the compliance remediation should also be gone
if err = f.AssertScanDoesNotContainCheck(tpName, checkName, f.OperatorNamespace); err != nil {
t.Fatal(err)
}
if err = f.AssertRemediationDoesNotExists(checkName, f.OperatorNamespace); err != nil {
t.Fatal(err)
}
}
21 changes: 12 additions & 9 deletions tests/e2e/serial/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,15 @@ func TestAutoRemediate(t *testing.T) {
t.Fatal(err)
}

// Fetch remediation here so we can clean up the machine config later.
// We do this before the rescan takes place because the rescan will
// prune the remediation after the check passes.
rem := &compv1alpha1.ComplianceRemediation{}
err = f.Client.Get(context.TODO(), types.NamespacedName{Name: remName, Namespace: f.OperatorNamespace}, rem)
if err != nil {
t.Fatal(err)
}

// We can re-run the scan at this moment and check that it's now compliant
// and it's reflected in a CheckResult
err = f.ReRunScan(scanName, f.OperatorNamespace)
Expand Down Expand Up @@ -436,15 +445,9 @@ func TestAutoRemediate(t *testing.T) {
t.Fatal(err)
}

// The test should not leave junk around, let's remove the MC and wait for the nodes to stabilize
// again
log.Printf("Removing applied remediation\n")
// Fetch remediation here so it can be deleted
rem := &compv1alpha1.ComplianceRemediation{}
err = f.Client.Get(context.TODO(), types.NamespacedName{Name: remName, Namespace: f.OperatorNamespace}, rem)
if err != nil {
t.Fatal(err)
}
// The test should not leave junk around, let's remove the MC and wait
// for the nodes to stabilize again
log.Printf("Removing applied machine config\n")
mcfgToBeDeleted := rem.Spec.Current.Object.DeepCopy()
mcfgToBeDeleted.SetName(rem.GetMcName())
err = f.Client.Delete(context.TODO(), mcfgToBeDeleted)
Expand Down

0 comments on commit f043d19

Please sign in to comment.