diff --git a/collectors/metrics/pkg/forwarder/forwarder.go b/collectors/metrics/pkg/forwarder/forwarder.go index d81e66e4f..679ac683a 100644 --- a/collectors/metrics/pkg/forwarder/forwarder.go +++ b/collectors/metrics/pkg/forwarder/forwarder.go @@ -29,10 +29,12 @@ import ( "github.com/stolostron/multicluster-observability-operator/collectors/metrics/pkg/metricsclient" "github.com/stolostron/multicluster-observability-operator/collectors/metrics/pkg/simulator" "github.com/stolostron/multicluster-observability-operator/collectors/metrics/pkg/status" + statuslib "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" ) const ( failedStatusReportMsg = "Failed to report status" + uwlPromURL = "https://prometheus-user-workload.openshift-user-workload-monitoring.svc:9092" ) type RuleMatcher interface { @@ -98,7 +100,8 @@ type Worker struct { status status.StatusReport - metrics *workerMetrics + metrics *workerMetrics + forwardFailures int } func CreateFromClient(cfg Config, metrics *workerMetrics, interval time.Duration, name string, @@ -297,7 +300,9 @@ func New(cfg Config) (*Worker, error) { } w.recordingRules = recordingRules - s, err := status.New(logger) + standalone := os.Getenv("STANDALONE") == "true" + isUwl := strings.Contains(os.Getenv("FROM"), uwlPromURL) + s, err := status.New(logger, standalone, isUwl) if err != nil { return nil, fmt.Errorf("unable to create StatusReport: %w", err) } @@ -366,6 +371,21 @@ func (w *Worker) forward(ctx context.Context) error { w.lock.Lock() defer w.lock.Unlock() + updateStatus := func(reason statuslib.Reason, message string) { + if reason == statuslib.ForwardFailed { + w.forwardFailures += 1 + if w.forwardFailures < 3 { + return + } + } + + w.forwardFailures = 0 + + if err := w.status.UpdateStatus(ctx, reason, message); err != nil { + rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", err) + } + } + var families []*clientmodel.MetricFamily var err error if w.simulatedTimeseriesFile != "" { @@ -378,19 +398,13 @@ func (w *Worker) forward(ctx context.Context) error { } else { families, err = w.getFederateMetrics(ctx) if err != nil { - statusErr := w.status.UpdateStatus(ctx, "Degraded", "Failed to retrieve metrics") - if statusErr != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", statusErr) - } + updateStatus(statuslib.ForwardFailed, "Failed to retrieve metrics") return err } rfamilies, err := w.getRecordingMetrics(ctx) if err != nil && len(rfamilies) == 0 { - statusErr := w.status.UpdateStatus(ctx, "Degraded", "Failed to retrieve recording metrics") - if statusErr != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", statusErr) - } + updateStatus(statuslib.ForwardFailed, "Failed to retrieve recording metrics") return err } else { families = append(families, rfamilies...) @@ -399,10 +413,7 @@ func (w *Worker) forward(ctx context.Context) error { before := metricfamily.MetricsCount(families) if err := metricfamily.Filter(families, w.transformer); err != nil { - statusErr := w.status.UpdateStatus(ctx, "Degraded", "Failed to filter metrics") - if statusErr != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", statusErr) - } + updateStatus(statuslib.ForwardFailed, "Failed to filter metrics") return err } @@ -416,34 +427,24 @@ func (w *Worker) forward(ctx context.Context) error { if len(families) == 0 { rlogger.Log(w.logger, rlogger.Warn, "msg", "no metrics to send, doing nothing") - statusErr := w.status.UpdateStatus(ctx, "Available", "No metrics to send") - if statusErr != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", statusErr) - } + updateStatus(statuslib.ForwardSuccessful, "No metrics to send") return nil } if w.to == nil { rlogger.Log(w.logger, rlogger.Warn, "msg", "to is nil, doing nothing") - statusErr := w.status.UpdateStatus(ctx, "Available", "Metrics is not required to send") - if statusErr != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", statusErr) - } + updateStatus(statuslib.ForwardSuccessful, "Metrics is not required to send") return nil } req := &http.Request{Method: "POST", URL: w.to} if err := w.toClient.RemoteWrite(ctx, req, families, w.interval); err != nil { - if err := w.status.UpdateStatus(ctx, "Degraded", "Failed to send metrics"); err != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", err) - } + updateStatus(statuslib.ForwardFailed, "Failed to send metrics") return err } if w.simulatedTimeseriesFile == "" { - if err := w.status.UpdateStatus(ctx, "Available", "Cluster metrics sent successfully"); err != nil { - rlogger.Log(w.logger, rlogger.Warn, "msg", failedStatusReportMsg, "err", err) - } + updateStatus(statuslib.ForwardSuccessful, "Cluster metrics sent successfully") } else { rlogger.Log(w.logger, rlogger.Warn, "msg", "Simulated metrics sent successfully") } diff --git a/collectors/metrics/pkg/status/status.go b/collectors/metrics/pkg/status/status.go index ce7142f97..0009bde8b 100644 --- a/collectors/metrics/pkg/status/status.go +++ b/collectors/metrics/pkg/status/status.go @@ -7,39 +7,35 @@ package status import ( "context" "errors" - "fmt" + "log/slog" "os" - "slices" - "sort" - "strings" - "time" "github.com/go-kit/log" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" + "github.com/go-logr/logr" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" + "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" ) const ( - name = "observability-addon" - namespace = "open-cluster-management-addon-observability" - uwlPromURL = "https://prometheus-user-workload.openshift-user-workload-monitoring.svc:9092" + addonName = "observability-addon" + addonNamespace = "open-cluster-management-addon-observability" ) type StatusReport struct { - statusClient client.Client - logger log.Logger + statusClient client.Client + standalone bool + isUwl bool + statusReporter status.Status + logger log.Logger } -func New(logger log.Logger) (*StatusReport, error) { +func New(logger log.Logger, standalone, isUwl bool) (*StatusReport, error) { testMode := os.Getenv("UNIT_TEST") != "" - standaloneMode := os.Getenv("STANDALONE") == "true" var kubeClient client.Client if testMode { s := scheme.Scheme @@ -50,8 +46,6 @@ func New(logger log.Logger) (*StatusReport, error) { WithScheme(s). WithStatusSubresource(&oav1beta1.ObservabilityAddon{}). Build() - } else if standaloneMode { - kubeClient = nil } else { config, err := clientcmd.BuildConfigFromFlags("", "") if err != nil { @@ -67,114 +61,35 @@ func New(logger log.Logger) (*StatusReport, error) { } } + logger.Log("msg", "Creating status client", "standalone", standalone, "isUwl", isUwl) + + statusLogger := logr.FromSlogHandler(slog.New(slog.NewTextHandler(os.Stdout, nil)).With("component", "statusclient").Handler()) return &StatusReport{ - statusClient: kubeClient, - logger: log.With(logger, "component", "statusclient"), + statusClient: kubeClient, + standalone: standalone, + isUwl: isUwl, + statusReporter: status.NewStatus(kubeClient, addonName, addonNamespace, statusLogger), + logger: logger, }, nil } -func (s *StatusReport) UpdateStatus(ctx context.Context, t string, m string) error { - // statusClient is nil when running on the hub. - if s.statusClient == nil { - return nil - } - - isUwl := false - if strings.Contains(os.Getenv("FROM"), uwlPromURL) { - isUwl = true - } - - retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - addon := &oav1beta1.ObservabilityAddon{} - err := s.statusClient.Get(ctx, types.NamespacedName{ - Name: name, - Namespace: namespace, - }, addon) - if err != nil { - return fmt.Errorf("failed to get ObservabilityAddon %s/%s: %w", namespace, name, err) - } - - // Sort the conditions by rising LastTransitionTime - sort.Slice(addon.Status.Conditions, func(i, j int) bool { - return addon.Status.Conditions[i].LastTransitionTime.Before(&addon.Status.Conditions[j].LastTransitionTime) - }) - - currentCondition := addon.Status.Conditions[len(addon.Status.Conditions)-1] - newCondition := mergeCondtion(isUwl, m, currentCondition) - - // If the current condition is the same, do not update - if currentCondition.Type == newCondition.Type && currentCondition.Reason == newCondition.Reason && currentCondition.Message == newCondition.Message && currentCondition.Status == newCondition.Status { - return nil - } - - s.logger.Log("msg", fmt.Sprintf("Updating status of ObservabilityAddon %s/%s", namespace, name), "type", newCondition.Type, "status", newCondition.Status, "reason", newCondition.Reason) - - // Reset the status of other main conditions - for i := range addon.Status.Conditions { - if slices.Contains([]string{"Available", "Degraded", "Progressing"}, addon.Status.Conditions[i].Type) { - addon.Status.Conditions[i].Status = metav1.ConditionFalse - } - } - - // Set the new condition - addon.Status.Conditions = mutateOrAppend(addon.Status.Conditions, newCondition) - - if err := s.statusClient.Status().Update(ctx, addon); err != nil { - return fmt.Errorf("failed to update ObservabilityAddon %s/%s: %w", namespace, name, err) - } - +func (s *StatusReport) UpdateStatus(ctx context.Context, reason status.Reason, message string) error { + // Standalone mode is set when running on the hub cluster + // In this case, we do not need to update the status of the ObservabilityAddon + if s.standalone { return nil - }) - if retryErr != nil { - return retryErr } - return nil -} -func mergeCondtion(isUwl bool, m string, condition oav1beta1.StatusCondition) oav1beta1.StatusCondition { - messages := strings.Split(condition.Message, " ; ") - if len(messages) == 1 { - messages = append(messages, "") - } - if isUwl { - messages[1] = fmt.Sprintf("User Workload: %s", m) - } else { - messages[0] = m + component := status.MetricsCollector + if s.isUwl { + component = status.UwlMetricsCollector } - message := messages[0] - if messages[1] != "" { - message = strings.Join(messages, " ; ") - } - conditionType := "Available" - reason := "Available" - if strings.Contains(message, "Failed") { - conditionType = "Degraded" - reason = "Degraded" - } - return oav1beta1.StatusCondition{ - Type: conditionType, - Status: metav1.ConditionTrue, - Reason: reason, - Message: message, - LastTransitionTime: metav1.NewTime(time.Now()), - } -} -// mutateOrAppend updates the status conditions with the new condition. -// If the condition already exists, it updates it with the new condition. -// If the condition does not exist, it appends the new condition to the status conditions. -func mutateOrAppend(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) []oav1beta1.StatusCondition { - if len(conditions) == 0 { - return []oav1beta1.StatusCondition{newCondition} + if wasReported, err := s.statusReporter.UpdateComponentCondition(ctx, component, reason, message); err != nil { + return err + } else if wasReported { + s.logger.Log("msg", "Status updated", "component", component, "reason", reason, "message", message) } - for i, condition := range conditions { - if condition.Type == newCondition.Type { - // Update the existing condition - conditions[i] = newCondition - return conditions - } - } - // If the condition type does not exist, append the new condition - return append(conditions, newCondition) + return nil } diff --git a/collectors/metrics/pkg/status/status_test.go b/collectors/metrics/pkg/status/status_test.go index 60a22a65b..d4ee78622 100644 --- a/collectors/metrics/pkg/status/status_test.go +++ b/collectors/metrics/pkg/status/status_test.go @@ -11,10 +11,13 @@ import ( "time" "github.com/go-kit/log" + "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" + "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" ) func init() { @@ -24,72 +27,126 @@ func init() { } func TestUpdateStatus(t *testing.T) { - s, err := New(log.NewNopLogger()) - if err != nil { - t.Fatalf("Failed to create new Status struct: (%v)", err) - } - addon := &oav1beta1.ObservabilityAddon{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, + testCases := map[string]struct { + reason status.Reason + message string + isUwl bool + initialConditions []oav1beta1.StatusCondition + expectedCondition oav1beta1.StatusCondition + }{ + "new status should be appended": { + reason: status.ForwardSuccessful, + message: "Forwarding metrics successful", + initialConditions: []oav1beta1.StatusCondition{}, + expectedCondition: oav1beta1.StatusCondition{ + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.ForwardSuccessful), + Message: "Forwarding metrics successful", + LastTransitionTime: metav1.NewTime(time.Now()), + }, }, - Status: oav1beta1.ObservabilityAddonStatus{ - Conditions: []oav1beta1.StatusCondition{ + "existing status should be updated": { + reason: status.ForwardFailed, + message: "Forwarding metrics failed", + initialConditions: []oav1beta1.StatusCondition{ { - Type: "Ready", + Type: string(status.MetricsCollector), Status: metav1.ConditionTrue, - Reason: "Deployed", - Message: "Metrics collector deployed and functional", - LastTransitionTime: metav1.NewTime(time.Now()), + Reason: string(status.ForwardSuccessful), + Message: "Forwarding metrics successful", + LastTransitionTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), }, }, + expectedCondition: oav1beta1.StatusCondition{ + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.ForwardFailed), + Message: "Forwarding metrics failed", + LastTransitionTime: metav1.NewTime(time.Now()), + }, + }, + "same status should not be updated": { + reason: status.ForwardSuccessful, + message: "Forwarding metrics successful", + initialConditions: []oav1beta1.StatusCondition{ + { + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.ForwardSuccessful), + Message: "Forwarding metrics successful", + LastTransitionTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, + }, + expectedCondition: oav1beta1.StatusCondition{ + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.ForwardSuccessful), + Message: "Forwarding metrics successful", + LastTransitionTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, + }, + "updateFailed to forward transition should not be allowed": { + reason: status.ForwardSuccessful, + message: "Forwarding metrics successful", + initialConditions: []oav1beta1.StatusCondition{ + { + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.UpdateFailed), + Message: "Update failed", + LastTransitionTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, + }, + expectedCondition: oav1beta1.StatusCondition{ + Type: string(status.MetricsCollector), + Status: metav1.ConditionTrue, + Reason: string(status.UpdateFailed), + Message: "Update failed", + LastTransitionTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)), + }, }, - } - ctx := context.Background() - err = s.statusClient.Create(ctx, addon) - if err != nil { - t.Fatalf("Failed to create observabilityAddon: (%v)", err) } - err = s.UpdateStatus(ctx, "Disabled", "enableMetrics is set to False") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + addon := &oav1beta1.ObservabilityAddon{ + ObjectMeta: metav1.ObjectMeta{ + Name: addonName, + Namespace: addonNamespace, + }, + Status: oav1beta1.ObservabilityAddonStatus{ + Conditions: tc.initialConditions, + }, + } - err = s.UpdateStatus(ctx, "Ready", "Metrics collector deployed and functional") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + s, err := New(log.NewLogfmtLogger(os.Stdout), false, tc.isUwl) + if err != nil { + t.Fatalf("Failed to create new Status struct: (%v)", err) + } - err = s.UpdateStatus(ctx, "Ready", "Metrics collector deployed and updated") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + if err := s.statusClient.Create(context.Background(), addon); err != nil { + t.Fatalf("Failed to create observabilityAddon: (%v)", err) + } - err = s.UpdateStatus(ctx, "Available", "Cluster metrics sent successfully") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + s.UpdateStatus(context.Background(), tc.reason, tc.message) - os.Setenv("FROM", uwlPromURL) - err = s.UpdateStatus(ctx, "Degraded", "Failed to retrieve metrics") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + foundAddon := &oav1beta1.ObservabilityAddon{} + if err := s.statusClient.Get(context.Background(), types.NamespacedName{Name: addonName, Namespace: addonNamespace}, foundAddon); err != nil { + t.Fatalf("Failed to get observabilityAddon: (%v)", err) + } - err = s.UpdateStatus(ctx, "Degraded", "Failed to send metrics") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } + if len(foundAddon.Status.Conditions) == 0 { + t.Fatalf("No conditions found in observabilityAddon") + } - err = s.UpdateStatus(ctx, "Available", "Cluster metrics sent successfully") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) - } - os.Setenv("FROM", "") - err = s.UpdateStatus(ctx, "Available", "Cluster metrics sent successfully") - if err != nil { - t.Fatalf("Failed to update status: (%v)", err) + condition := foundAddon.Status.Conditions[0] + assert.Equal(t, tc.expectedCondition.Type, condition.Type) + assert.Equal(t, tc.expectedCondition.Status, condition.Status) + assert.Equal(t, tc.expectedCondition.Reason, condition.Reason) + assert.Equal(t, tc.expectedCondition.Message, condition.Message) + assert.InEpsilon(t, tc.expectedCondition.LastTransitionTime.Unix(), condition.LastTransitionTime.Unix(), 1) + }) } } diff --git a/operators/endpointmetrics/controllers/observabilityendpoint/observabilityaddon_controller.go b/operators/endpointmetrics/controllers/observabilityendpoint/observabilityaddon_controller.go index 1eef31242..e5a5137fb 100644 --- a/operators/endpointmetrics/controllers/observabilityendpoint/observabilityaddon_controller.go +++ b/operators/endpointmetrics/controllers/observabilityendpoint/observabilityaddon_controller.go @@ -32,13 +32,13 @@ import ( "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/hypershift" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/openshift" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/rendering" - "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/status" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/util" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" oav1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" operatorconfig "github.com/stolostron/multicluster-observability-operator/operators/pkg/config" "github.com/stolostron/multicluster-observability-operator/operators/pkg/deploying" rendererutil "github.com/stolostron/multicluster-observability-operator/operators/pkg/rendering" + "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" ) var ( @@ -192,8 +192,11 @@ func (r *ObservabilityAddonReconciler) Reconcile(ctx context.Context, req ctrl.R // ACM 8509: Special case for hub/local cluster metrics collection // We do not report status for hub endpoint operator if !r.IsHubMetricsCollector { - if err := status.ReportStatus(ctx, r.Client, status.NotSupported, obsAddon.Name, obsAddon.Namespace); err != nil { + statusReporter := status.NewStatus(r.Client, obsAddon.Name, obsAddon.Namespace, log) + if wasReported, err := statusReporter.UpdateComponentCondition(ctx, status.MetricsCollector, status.NotSupported, "Prometheus service not found"); err != nil { log.Error(err, "Failed to report status") + } else if wasReported { + log.Info("Status updated", "component", status.MetricsCollector, "reason", status.NotSupported) } } diff --git a/operators/endpointmetrics/controllers/status/status_controller.go b/operators/endpointmetrics/controllers/status/status_controller.go index 77716622f..73c827ba7 100644 --- a/operators/endpointmetrics/controllers/status/status_controller.go +++ b/operators/endpointmetrics/controllers/status/status_controller.go @@ -9,9 +9,12 @@ import ( "fmt" "net" "reflect" + "sort" "time" + "github.com/go-logr/logr" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" @@ -19,12 +22,69 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/go-logr/logr" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/util" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" + "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" ) +// conditionType represents the standard conditions expected by ACM in the ObservabilityAddon status. +type conditionType string + +const ( + Available conditionType = "Available" + Progressing conditionType = "Progressing" + Degraded conditionType = "Degraded" +) + +var ( + // componentsMap contains the types of conditions (from individual components) that must be aggregated into standard conditions. + componentsMap = map[string]struct{}{ + string(status.MetricsCollector): {}, + string(status.UwlMetricsCollector): {}, + } +) + +// reason maps individual component reasons to standard types and assigns a priority to each reason. +// The priority is used to aggregate the conditions of the components into a single condition. +type reason struct { + reason string + priority int + stdType conditionType +} + +func newReason(s string) reason { + switch s { + case string(status.Disabled): + return reason{string(status.Disabled), 1, Degraded} + case string(status.ForwardSuccessful): + return reason{string(status.ForwardSuccessful), 2, Available} + case string(status.UpdateSuccessful): + return reason{string(status.UpdateSuccessful), 3, Progressing} + case string(status.ForwardFailed): + return reason{string(status.ForwardFailed), 4, Degraded} + case string(status.UpdateFailed): + return reason{string(status.UpdateFailed), 5, Degraded} + case string(status.NotSupported): + return reason{string(status.NotSupported), 6, Degraded} + default: + return reason{s, -1, Degraded} + } +} + +func (r reason) String() string { + return string(r.reason) +} + +func (r reason) Priority() int { + return r.priority +} + +func (r reason) StdType() conditionType { + return r.stdType +} + // StatusReconciler reconciles status object. type StatusReconciler struct { Client client.Client @@ -35,33 +95,85 @@ type StatusReconciler struct { Logger logr.Logger } -// Reconcile reads that state of the cluster for a ObservabilityAddon object and makes changes based on the state read -// and what is in the ObservabilityAddon.Status -// The Controller will requeue the Request to be processed again if the returned error is non-nil or -// Result.Requeue is true, otherwise upon completion it will remove the work from the queue. +// Reconcile reads the status' conditions of ObservabilityAddon, aggregates the individual component conditions +// into standard conditions, and updates the status in the local and hub clusters. +// It returns: +// - a TerminalError if the reconciliation fails and no requeue is needed +// - a non terminal error if the reconciliation fails and a requeue is needed +// - a result.RequeueAfter if the reconciliation fails and a requeue with delay is needed func (r *StatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { r.Logger.WithValues("Request", req.String()).Info("Reconciling") + if res, err := r.updateSpokeAddon(ctx); err != nil { + return res, err + } else if !res.IsZero() { + return res, nil + } + + if res, err := r.updateHubAddon(ctx); err != nil { + return res, err + } else if !res.IsZero() { + return res, nil + } + + return ctrl.Result{}, nil +} + +func (s *StatusReconciler) updateSpokeAddon(ctx context.Context) (ctrl.Result, error) { + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + // Fetch the ObservabilityAddon instance in local cluster + obsAddon := &oav1beta1.ObservabilityAddon{} + if err := s.Client.Get(ctx, types.NamespacedName{Name: s.ObsAddonName, Namespace: s.Namespace}, obsAddon); err != nil { + return err + } + + addonNewCondition := aggregateComponentsConditions(obsAddon.Status.Conditions) + if addonNewCondition == nil { + return nil + } + + if !shouldUpdateConditions(obsAddon.Status.Conditions, *addonNewCondition) { + return nil + } + + obsAddon.Status.Conditions = resetMainConditionsStatus(obsAddon.Status.Conditions) + obsAddon.Status.Conditions = mutateOrAppend(obsAddon.Status.Conditions, *addonNewCondition) + + s.Logger.Info(fmt.Sprintf("Updating status of ObservabilityAddon %s/%s", obsAddon.Namespace, obsAddon.Name), "type", addonNewCondition.Type, "reason", addonNewCondition.Reason) + + return s.Client.Status().Update(ctx, obsAddon) + }) + + if retryErr != nil { + if errors.IsConflict(retryErr) || util.IsTransientClientErr(retryErr) { + return s.requeueWithOptionalDelay(fmt.Errorf("failed to update status in spoke cluster with retryable error: %w", retryErr)) + } + return ctrl.Result{}, reconcile.TerminalError(retryErr) + } + + return ctrl.Result{}, nil +} + +func (s *StatusReconciler) updateHubAddon(ctx context.Context) (ctrl.Result, error) { // Fetch the ObservabilityAddon instance in hub cluster hubObsAddon := &oav1beta1.ObservabilityAddon{} - err := r.HubClient.Get(ctx, types.NamespacedName{Name: r.ObsAddonName, Namespace: r.HubNamespace}, hubObsAddon) + err := s.HubClient.Get(ctx, types.NamespacedName{Name: s.ObsAddonName, Namespace: s.HubNamespace}, hubObsAddon) if err != nil { if isAuthOrConnectionErr(err) { // Try reloading the kubeconfig for the hub cluster var reloadErr error - if r.HubClient, reloadErr = r.HubClient.Reload(); reloadErr != nil { - return ctrl.Result{}, fmt.Errorf("failed to reload the hub client: %w", reloadErr) + if s.HubClient, reloadErr = s.HubClient.Reload(); reloadErr != nil { + return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("failed to reload the hub client: %w", reloadErr)) } - r.Logger.Info("Failed to get ObservabilityAddon in hub cluster, reloaded hub, requeue with delay", "error", err) - return ctrl.Result{Requeue: true}, nil + return ctrl.Result{}, fmt.Errorf("failed to get ObservabilityAddon in hub cluster, reloaded hub client: %w", err) } if util.IsTransientClientErr(err) { - r.Logger.Info("Failed to get ObservabilityAddon in hub cluster, requeue with delay", "error", err) - return requeueWithOptionalDelay(err), nil + s.Logger.Info("Failed to get ObservabilityAddon in hub cluster, requeue with delay", "error", err) + return s.requeueWithOptionalDelay(err) } - return ctrl.Result{}, err + return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("failed to get ObservabilityAddon in hub cluster: %w", err)) } // Retry on conflict as operation happens in other cluster @@ -69,7 +181,7 @@ func (r *StatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { // Fetch the ObservabilityAddon instance in local cluster obsAddon := &oav1beta1.ObservabilityAddon{} - if err != r.Client.Get(ctx, types.NamespacedName{Name: r.ObsAddonName, Namespace: r.Namespace}, obsAddon) { + if err != s.Client.Get(ctx, types.NamespacedName{Name: s.ObsAddonName, Namespace: s.Namespace}, obsAddon) { return err } @@ -82,33 +194,56 @@ func (r *StatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr updatedAddon.Status = obsAddon.Status // Update the status in hub cluster - return r.HubClient.Status().Update(ctx, updatedAddon) + return s.HubClient.Status().Update(ctx, updatedAddon) }) if retryErr != nil { if util.IsTransientClientErr(retryErr) || errors.IsConflict(retryErr) { - r.Logger.Info("Retryable error while updating status, request will be retried.", "error", retryErr) - return requeueWithOptionalDelay(retryErr), nil + s.Logger.Info("Retryable error while updating status, request will be retried.", "error", retryErr) + return s.requeueWithOptionalDelay(retryErr) } - return ctrl.Result{}, fmt.Errorf("failed to update status in hub cluster: %w", retryErr) + return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("failed to update status in hub cluster: %w", retryErr)) } return ctrl.Result{}, nil } +// requeueWithOptionalDelay requeues the request with a delay if suggested by the error +// Otherwise, it requeues the request without a delay by returning an error +// The runtime will requeue the request without a delay if the error is non-nil +func (r *StatusReconciler) requeueWithOptionalDelay(err error) (ctrl.Result, error) { + if delay, ok := errors.SuggestsClientDelay(err); ok { + r.Logger.Info("Requeue with delay", "error", err, "delay", delay) + return ctrl.Result{RequeueAfter: time.Duration(delay) * time.Second}, nil + } + + return ctrl.Result{}, err +} + // SetupWithManager sets up the controller with the Manager. func (r *StatusReconciler) SetupWithManager(mgr ctrl.Manager) error { + filterOutStandardConditions := func(c []oav1beta1.StatusCondition) []oav1beta1.StatusCondition { + var filtered []oav1beta1.StatusCondition + for _, condition := range c { + if condition.Type == "Available" || condition.Type == "Progressing" || condition.Type == "Degraded" { + continue + } + filtered = append(filtered, condition) + } + return filtered + } pred := predicate.Funcs{ CreateFunc: func(e event.CreateEvent) bool { return false }, UpdateFunc: func(e event.UpdateEvent) bool { - if e.ObjectNew.GetNamespace() == r.Namespace && - !reflect.DeepEqual(e.ObjectNew.(*oav1beta1.ObservabilityAddon).Status, - e.ObjectOld.(*oav1beta1.ObservabilityAddon).Status) { - return true + if e.ObjectNew.GetNamespace() != r.Namespace { + return false } - return false + + newConditions := filterOutStandardConditions(e.ObjectNew.(*oav1beta1.ObservabilityAddon).Status.Conditions) + oldConditions := filterOutStandardConditions(e.ObjectOld.(*oav1beta1.ObservabilityAddon).Status.Conditions) + return !reflect.DeepEqual(newConditions, oldConditions) }, DeleteFunc: func(e event.DeleteEvent) bool { return false @@ -134,12 +269,119 @@ func isAuthOrConnectionErr(err error) bool { return false } -// requeueWithOptionalDelay requeues the request with a delay if suggested by the error -// Otherwise, it requeues the request without a delay -func requeueWithOptionalDelay(err error) ctrl.Result { - if delay, ok := errors.SuggestsClientDelay(err); ok { - return ctrl.Result{RequeueAfter: time.Duration(delay) * time.Second} +// mutateOrAppend updates the status conditions with the new condition. +// If the condition already exists, it updates it with the new condition. +// If the condition does not exist, it appends the new condition to the status conditions. +func mutateOrAppend(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) []oav1beta1.StatusCondition { + if len(conditions) == 0 { + return []oav1beta1.StatusCondition{newCondition} + } + + for i, condition := range conditions { + if condition.Type == newCondition.Type { + // Update the existing condition + conditions[i] = newCondition + return conditions + } + } + // If the condition type does not exist, append the new condition + return append(conditions, newCondition) +} + +// shouldAppendCondition checks if the new condition should be appended to the status conditions +// based on the last condition in the slice. +func shouldUpdateConditions(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) bool { + filteredConditions := []oav1beta1.StatusCondition{} + validTypes := map[string]struct{}{ + string(Available): {}, + string(Progressing): {}, + string(Degraded): {}, + } + for _, condition := range conditions { + if _, ok := validTypes[condition.Type]; ok { + filteredConditions = append(filteredConditions, condition) + } + } + + if len(filteredConditions) == 0 { + return true } - return ctrl.Result{Requeue: true} + sort.Slice(filteredConditions, func(i, j int) bool { + if filteredConditions[i].Status == metav1.ConditionFalse && filteredConditions[j].Status == metav1.ConditionTrue { + return true + } + return filteredConditions[i].LastTransitionTime.Before(&filteredConditions[j].LastTransitionTime) + }) + + lastCondition := filteredConditions[len(filteredConditions)-1] + + return lastCondition.Type != newCondition.Type || + lastCondition.Status != newCondition.Status || + lastCondition.Reason != newCondition.Reason || + lastCondition.Message != newCondition.Message +} + +// aggregateComponentsConditions aggregates the conditions of the components into a single condition +// the condition type and reason are set based on the priority of the reasons of the components +// the m +func aggregateComponentsConditions(conditions []oav1beta1.StatusCondition) *oav1beta1.StatusCondition { + // Filter out standard conditions + filteredConditions := []oav1beta1.StatusCondition{} + for _, condition := range conditions { + if _, ok := componentsMap[condition.Type]; ok { + filteredConditions = append(filteredConditions, condition) + } + } + + if len(filteredConditions) == 0 { + return nil + } + + // Sort the conditions by decreasing priority of the reason + // If same priority, order by the type of the condition + sort.Slice(filteredConditions, func(i, j int) bool { + if newReason(filteredConditions[i].Reason).Priority() == newReason(filteredConditions[j].Reason).Priority() { + return filteredConditions[i].Type < filteredConditions[j].Type + } + return newReason(filteredConditions[i].Reason).Priority() > newReason(filteredConditions[j].Reason).Priority() + }) + + // Aggregate the conditions based on the priority of the reason + aggregatedCondition := &oav1beta1.StatusCondition{ + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Now(), + Reason: filteredConditions[0].Reason, + Type: string(newReason(filteredConditions[0].Reason).StdType()), + Message: fmt.Sprintf("%s: %s", filteredConditions[0].Type, filteredConditions[0].Message), + } + + // Set some standard messages for the aggregated condition. It aligns with the registration-agent available message (see below) + if aggregatedCondition.Type == string(Available) { + // If the aggregated condition is Available, override the message with the same message as the registration-agent + // It avoids confusion for the user. Because at some point, the registration-agent overrides the "Available" condition + // with its own message. + aggregatedCondition.Message = "observability-controller add-on is available." + } else if aggregatedCondition.Type == string(Progressing) { + aggregatedCondition.Message = "observability-controller add-on is progressing." + } else if aggregatedCondition.Type == string(Degraded) && aggregatedCondition.Reason == string(status.Disabled) { + aggregatedCondition.Message = "observability-controller add-on is disabled." + } + + // truncate the message if it exceeds the limit + limit := 256 + if len(aggregatedCondition.Message) > limit { + aggregatedCondition.Message = aggregatedCondition.Message[:limit-3] + "..." + } + + return aggregatedCondition +} + +func resetMainConditionsStatus(conditions []oav1beta1.StatusCondition) []oav1beta1.StatusCondition { + for i := range conditions { + if conditions[i].Type == string(Available) || conditions[i].Type == string(Degraded) || conditions[i].Type == string(Progressing) { + conditions[i].Status = metav1.ConditionFalse + } + } + return conditions } diff --git a/operators/endpointmetrics/controllers/status/status_controller_test.go b/operators/endpointmetrics/controllers/status/status_controller_test.go index 0c968b428..3ae9af4b6 100644 --- a/operators/endpointmetrics/controllers/status/status_controller_test.go +++ b/operators/endpointmetrics/controllers/status/status_controller_test.go @@ -6,23 +6,27 @@ package status_test import ( "context" + "errors" "fmt" "net" "reflect" + "sort" "testing" + "time" - "k8s.io/apimachinery/pkg/api/errors" + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + apiErrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" + addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - - "github.com/go-logr/logr" - addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/controllers/status" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/util" @@ -30,28 +34,35 @@ import ( oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" ) -const ( - name = "observability-addon" - testNamespace = "test-ns" - testHubNamespace = "test-hub-ns" - obAddonName = "observability-addon" -) - -func TestStatusController_NominalCase(t *testing.T) { - spokeOba := newObservabilityAddon(name, testNamespace) - c := newClient(spokeOba) +func TestStatusController_HubNominalCase(t *testing.T) { + addonName := "observability-addon" + addonNamespace := "test-ns" + spokeOba := newObservabilityAddon(addonName, addonNamespace) + spokeClient := newClient(spokeOba) - hubOba := newObservabilityAddon(name, testHubNamespace) + addonHubNamespace := "test-ns" + hubOba := newObservabilityAddon(addonName, addonHubNamespace) hubOba.Spec.Interval = 12341 // add variation in the spec, not status custumHubClient := newClientWithUpdateError(newClient(hubOba), nil, nil) - r := newStatusReconciler(c, func() (client.Client, error) { return custumHubClient, nil }) + reloadableHubClient, err := util.NewReloadableHubClientWithReloadFunc(func() (client.Client, error) { return custumHubClient, nil }) + if err != nil { + t.Fatalf("Failed to create reloadable hub client: %v", err) + } + statusReconciler := &status.StatusReconciler{ + Client: spokeClient, + HubClient: reloadableHubClient, + Namespace: addonNamespace, + HubNamespace: addonHubNamespace, + ObsAddonName: addonName, + Logger: logr.Discard(), + } // no status difference triggers no update - resp, err := r.Reconcile(context.Background(), newRequest()) + resp, err := statusReconciler.Reconcile(context.Background(), ctrl.Request{}) if err != nil { t.Fatalf("Failed to reconcile: %v", err) } - if !reflect.DeepEqual(resp, ctrl.Result{}) { + if !resp.IsZero() { t.Fatalf("Expected no requeue") } if custumHubClient.UpdateCallsCount() > 0 { @@ -59,19 +70,21 @@ func TestStatusController_NominalCase(t *testing.T) { } // update status in spoke - addCondition(spokeOba, "Deployed", metav1.ConditionTrue) - err = c.Status().Update(context.Background(), spokeOba) + spokeOba.Status.Conditions = append(spokeOba.Status.Conditions, oav1beta1.StatusCondition{ + Type: "Available", + }) + err = spokeClient.Status().Update(context.Background(), spokeOba) if err != nil { t.Fatalf("Failed to update status in spoke: %v", err) } // status difference should trigger update in hub - resp, err = r.Reconcile(context.Background(), newRequest()) + resp, err = statusReconciler.Reconcile(context.Background(), ctrl.Request{}) if err != nil { t.Fatalf("Failed to reconcile: %v", err) } - if !reflect.DeepEqual(resp, ctrl.Result{}) { + if !resp.IsZero() { t.Fatalf("Expected no requeue") } if custumHubClient.UpdateCallsCount() != 1 { @@ -80,7 +93,7 @@ func TestStatusController_NominalCase(t *testing.T) { // check status in hub hubObsAddon := &oav1beta1.ObservabilityAddon{} - err = custumHubClient.Get(context.Background(), types.NamespacedName{Name: obAddonName, Namespace: testHubNamespace}, hubObsAddon) + err = custumHubClient.Get(context.Background(), types.NamespacedName{Name: addonName, Namespace: addonHubNamespace}, hubObsAddon) if err != nil { t.Fatalf("Failed to get oba in hub: %v", err) } @@ -90,48 +103,63 @@ func TestStatusController_NominalCase(t *testing.T) { } func TestStatusController_UpdateHubAddonFailures(t *testing.T) { - spokeOba := newObservabilityAddon(name, testNamespace) - addCondition(spokeOba, "Deployed", metav1.ConditionTrue) // add status to trigger update - c := newClient(spokeOba) + addonName := "observability-addon" + addonNamespace := "test-ns" + spokeOba := newObservabilityAddon(addonName, addonNamespace) + // add status to trigger update + spokeOba.Status.Conditions = append(spokeOba.Status.Conditions, oav1beta1.StatusCondition{ + Type: "Available", + }) + spokeClient := newClient(spokeOba) - hubOba := newObservabilityAddon(name, testHubNamespace) + addonHubNamespace := "test-ns" + hubOba := newObservabilityAddon(addonName, addonHubNamespace) var updateErr error hubClientWithConflict := newClientWithUpdateError(newClient(hubOba), updateErr, nil) - r := newStatusReconciler(c, func() (client.Client, error) { return hubClientWithConflict, nil }) + reloadableHubClient, err := util.NewReloadableHubClientWithReloadFunc(func() (client.Client, error) { return hubClientWithConflict, nil }) + if err != nil { + t.Fatalf("Failed to create reloadable hub client: %v", err) + } + statusReconciler := &status.StatusReconciler{ + Client: spokeClient, + HubClient: reloadableHubClient, + Namespace: addonNamespace, + HubNamespace: addonHubNamespace, + ObsAddonName: addonName, + Logger: logr.Discard(), + } testCases := map[string]struct { updateErr error - reconcileErr error + terminalErr bool requeue bool - requeueAfter bool requeueAfterVal int updateCallsMin int updateCallsMax int }{ "Conflict": { - updateErr: errors.NewConflict(schema.GroupResource{Group: oav1beta1.GroupVersion.Group, Resource: "FakeResource"}, name, fmt.Errorf("fake conflict")), + updateErr: apiErrors.NewConflict(schema.GroupResource{Group: oav1beta1.GroupVersion.Group, Resource: "FakeResource"}, addonName, fmt.Errorf("fake conflict")), requeue: true, updateCallsMin: 1, }, "Server unavailable": { - updateErr: errors.NewServiceUnavailable("service unavailable"), + updateErr: apiErrors.NewServiceUnavailable("service unavailable"), requeue: true, updateCallsMax: 1, }, "internal error": { - updateErr: errors.NewInternalError(fmt.Errorf("internal error")), - // reconcileErr: errors.NewInternalError(fmt.Errorf("fake internal error")), + updateErr: apiErrors.NewInternalError(fmt.Errorf("internal error")), updateCallsMax: 1, requeue: true, }, "Permanent error": { - updateErr: errors.NewBadRequest("bad request"), - reconcileErr: errors.NewBadRequest("bad request"), + updateErr: apiErrors.NewBadRequest("bad request"), + terminalErr: true, updateCallsMax: 1, }, "Too many requests": { - updateErr: errors.NewTooManyRequests("too many requests", 10), - requeueAfter: true, + updateErr: apiErrors.NewTooManyRequests("too many requests", 10), + requeue: true, requeueAfterVal: 10, updateCallsMax: 1, }, @@ -148,16 +176,17 @@ func TestStatusController_UpdateHubAddonFailures(t *testing.T) { t.Run(name, func(t *testing.T) { hubClientWithConflict.UpdateError = tc.updateErr hubClientWithConflict.Reset() - resp, err := r.Reconcile(context.Background(), newRequest()) - if (tc.reconcileErr != nil && err == nil) || (tc.reconcileErr == nil && err != nil) { - t.Fatalf("Invalid reconcile error: got %v, expected %v", err, tc.reconcileErr) - } - if tc.requeue != resp.Requeue { - t.Fatalf("Invalid requeue: got %v, expected %v", resp.Requeue, tc.requeue) + resp, err := statusReconciler.Reconcile(context.Background(), ctrl.Request{}) + isTerminalErr := errors.Is(err, reconcile.TerminalError(nil)) + if tc.terminalErr != isTerminalErr { + t.Fatalf("Invalid reconcile error: got %v, expected %v", err, tc.terminalErr) } - if tc.requeueAfter != (resp.RequeueAfter > 0) { - t.Fatalf("Invalid requeue after: got %v, expected %v", resp.RequeueAfter > 0, tc.requeueAfter) + + isRequeued := (!resp.IsZero() && err == nil) || (err != nil && !errors.Is(err, reconcile.TerminalError(nil))) + if tc.requeue != isRequeued { + t.Fatalf("Expected requeue") } + if tc.requeueAfterVal > 0 && int(resp.RequeueAfter.Seconds()) != tc.requeueAfterVal { t.Fatalf("Invalid requeue after value: got %v, expected %v", int(resp.RequeueAfter.Seconds()), tc.requeueAfterVal) } @@ -172,42 +201,59 @@ func TestStatusController_UpdateHubAddonFailures(t *testing.T) { } func TestStatusController_GetHubAddonFailures(t *testing.T) { - spokeOba := newObservabilityAddon(name, testNamespace) - addCondition(spokeOba, "Deployed", metav1.ConditionTrue) // add status to trigger update - c := newClient(spokeOba) + addonName := "observability-addon" + addonNamespace := "test-ns" + spokeOba := newObservabilityAddon(addonName, addonNamespace) + // add status to trigger update + spokeOba.Status.Conditions = append(spokeOba.Status.Conditions, oav1beta1.StatusCondition{ + Type: "Available", + }) + spokeClient := newClient(spokeOba) - hubOba := newObservabilityAddon(name, testHubNamespace) + addonHubNamespace := "test-ns" + hubOba := newObservabilityAddon(addonName, addonHubNamespace) hubClientWithConflict := newClientWithUpdateError(newClient(hubOba), nil, nil) + var reloadCount int - r := newStatusReconciler(c, func() (client.Client, error) { + reloadableHubClient, err := util.NewReloadableHubClientWithReloadFunc(func() (client.Client, error) { reloadCount++ return hubClientWithConflict, nil }) + if err != nil { + t.Fatalf("Failed to create reloadable hub client: %v", err) + } + statusReconciler := &status.StatusReconciler{ + Client: spokeClient, + HubClient: reloadableHubClient, + Namespace: addonNamespace, + HubNamespace: addonHubNamespace, + ObsAddonName: addonName, + Logger: logr.Discard(), + } testCases := map[string]struct { getErr error - reconcileErr error + terminalErr bool requeue bool - requeueAfter bool requeueAfterVal int reloadCount int }{ "Unauthorized": { - getErr: errors.NewUnauthorized("unauthorized"), + getErr: apiErrors.NewUnauthorized("unauthorized"), requeue: true, reloadCount: 1, }, "Permanent error": { - getErr: errors.NewBadRequest("bad request"), - reconcileErr: errors.NewBadRequest("bad request"), + getErr: apiErrors.NewBadRequest("bad request"), + terminalErr: true, }, "Servers unavailable": { - getErr: errors.NewServiceUnavailable("service unavailable"), + getErr: apiErrors.NewServiceUnavailable("service unavailable"), requeue: true, }, "Too many requests": { - getErr: errors.NewTooManyRequests("too many requests", 10), - requeueAfter: true, + getErr: apiErrors.NewTooManyRequests("too many requests", 10), + requeue: true, requeueAfterVal: 10, }, } @@ -216,16 +262,14 @@ func TestStatusController_GetHubAddonFailures(t *testing.T) { t.Run(name, func(t *testing.T) { hubClientWithConflict.GetError = tc.getErr reloadCount = 0 - // hubClientWithConflict.Reset() - resp, err := r.Reconcile(context.Background(), newRequest()) - if (tc.reconcileErr != nil && err == nil) || (tc.reconcileErr == nil && err != nil) { - t.Fatalf("Invalid reconcile error: got %v, expected %v", err, tc.reconcileErr) + resp, err := statusReconciler.Reconcile(context.Background(), ctrl.Request{}) + isTerminalErr := errors.Is(err, reconcile.TerminalError(nil)) + if tc.terminalErr != isTerminalErr { + t.Fatalf("Invalid reconcile error: got %v, expected %v", err, tc.terminalErr) } - if tc.requeue != resp.Requeue { - t.Fatalf("Invalid requeue: got %v, expected %v", resp.Requeue, tc.requeue) - } - if tc.requeueAfter != (resp.RequeueAfter > 0) { - t.Fatalf("Invalid requeue after: got %v, expected %v", resp.RequeueAfter > 0, tc.requeueAfter) + isRequeued := (!resp.IsZero() && err == nil) || (err != nil && !errors.Is(err, reconcile.TerminalError(nil))) + if tc.requeue != isRequeued { + t.Fatalf("Expected requeue") } if tc.requeueAfterVal > 0 && int(resp.RequeueAfter.Seconds()) != tc.requeueAfterVal { t.Fatalf("Invalid requeue after value: got %v, expected %v", int(resp.RequeueAfter.Seconds()), tc.requeueAfterVal) @@ -237,6 +281,165 @@ func TestStatusController_GetHubAddonFailures(t *testing.T) { } } +func TestStatusController_UpdateSpokeAddon(t *testing.T) { + addonName := "observability-addon" + addonNamespace := "test-ns" + + addonHubNamespace := "test-ns" + hubOba := newObservabilityAddon(addonName, addonHubNamespace) + var updateErr error + hubClientWithConflict := newClientWithUpdateError(newClient(hubOba), updateErr, nil) + reloadableHubClient, err := util.NewReloadableHubClientWithReloadFunc(func() (client.Client, error) { return hubClientWithConflict, nil }) + if err != nil { + t.Fatalf("Failed to create reloadable hub client: %v", err) + } + availableMsg := "observability-controller add-on is available." + + newCondition := func(t, r, m string, status metav1.ConditionStatus, lastTransitionTime time.Time) oav1beta1.StatusCondition { + return oav1beta1.StatusCondition{ + Type: t, + Reason: r, + Message: m, + Status: status, + LastTransitionTime: metav1.NewTime(lastTransitionTime), + } + } + + testCases := map[string]struct { + spokeAddonConditions []oav1beta1.StatusCondition + expectConditions []oav1beta1.StatusCondition + }{ + "no condition": { + spokeAddonConditions: []oav1beta1.StatusCondition{}, + expectConditions: []oav1beta1.StatusCondition{}, + }, + "no component condition": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("Available", "ForwardSuccessful", "MetricsCollector: Metrics sent", metav1.ConditionTrue, time.Now().Add(-time.Minute)), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("Available", "ForwardSuccessful", "MetricsCollector: Metrics sent", metav1.ConditionTrue, time.Now().Add(-time.Minute)), + }, + }, + "single component aggregation": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("Available", "ForwardSuccessful", availableMsg, metav1.ConditionTrue, time.Now()), + }, + }, + "multi aggregation with same reason": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("Available", "ForwardSuccessful", availableMsg, metav1.ConditionTrue, time.Now()), + }, + }, + "multi aggregation with highest priority reason": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + newCondition("Degraded", "ForwardFailed", "UwlMetricsCollector: Metrics failed", metav1.ConditionTrue, time.Now()), + }, + }, + "conditions are not updated if they are the same": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + newCondition("Degraded", "ForwardFailed", "UwlMetricsCollector: Metrics failed", metav1.ConditionTrue, time.Now().Add(-time.Minute)), + newCondition("Available", "ForwardSuccessful", "", metav1.ConditionFalse, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardSuccessful", "Metrics sent", metav1.ConditionTrue, time.Now()), + newCondition("UwlMetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + newCondition("Available", "ForwardSuccessful", "", metav1.ConditionFalse, time.Now()), + newCondition("Degraded", "ForwardFailed", "UwlMetricsCollector: Metrics failed", metav1.ConditionTrue, time.Now().Add(-time.Minute)), + }, + }, + "status is updated if the condition is different": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + newCondition("Available", "ForwardSuccessful", "MetricsCollector: Metrics sent", metav1.ConditionTrue, time.Now().Add(-time.Minute)), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "ForwardFailed", "Metrics failed", metav1.ConditionTrue, time.Now()), + newCondition("Available", "ForwardSuccessful", "MetricsCollector: Metrics sent", metav1.ConditionFalse, time.Now().Add(-time.Minute)), + newCondition("Degraded", "ForwardFailed", "MetricsCollector: Metrics failed", metav1.ConditionTrue, time.Now()), + }, + }, + "override progressing message": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "UpdateSuccessful", "Metrics updated", metav1.ConditionTrue, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "UpdateSuccessful", "Metrics updated", metav1.ConditionTrue, time.Now()), + newCondition("Progressing", "UpdateSuccessful", "observability-controller add-on is progressing.", metav1.ConditionTrue, time.Now()), + }, + }, + "override disabled message": { + spokeAddonConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "Disabled", "Metrics disabled", metav1.ConditionTrue, time.Now()), + }, + expectConditions: []oav1beta1.StatusCondition{ + newCondition("MetricsCollector", "Disabled", "Metrics disabled", metav1.ConditionTrue, time.Now()), + newCondition("Degraded", "Disabled", "observability-controller add-on is disabled.", metav1.ConditionTrue, time.Now()), + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + spokeOba := newObservabilityAddon(addonName, addonNamespace) + spokeOba.Status.Conditions = tc.spokeAddonConditions + spokeClient := newClient(spokeOba) + statusReconciler := &status.StatusReconciler{ + Client: spokeClient, + HubClient: reloadableHubClient, + Namespace: addonNamespace, + HubNamespace: addonHubNamespace, + ObsAddonName: addonName, + Logger: logr.Discard(), + } + + resp, err := statusReconciler.Reconcile(context.Background(), ctrl.Request{}) + assert.NoError(t, err) + assert.True(t, resp.IsZero()) + + newSpokeOba := &oav1beta1.ObservabilityAddon{} + err = spokeClient.Get(context.Background(), types.NamespacedName{Name: addonName, Namespace: addonNamespace}, newSpokeOba) + if err != nil { + t.Fatalf("Failed to get oba in spoke: %v", err) + } + + assert.Equal(t, len(tc.expectConditions), len(newSpokeOba.Status.Conditions)) + + sort.Slice(newSpokeOba.Status.Conditions, func(i, j int) bool { + return newSpokeOba.Status.Conditions[i].Type < newSpokeOba.Status.Conditions[j].Type + }) + sort.Slice(tc.expectConditions, func(i, j int) bool { + return tc.expectConditions[i].Type < tc.expectConditions[j].Type + }) + for i := range tc.expectConditions { + assert.Equal(t, tc.expectConditions[i].Type, newSpokeOba.Status.Conditions[i].Type) + assert.Equal(t, tc.expectConditions[i].Reason, newSpokeOba.Status.Conditions[i].Reason) + assert.Equal(t, tc.expectConditions[i].Message, newSpokeOba.Status.Conditions[i].Message) + assert.Equal(t, tc.expectConditions[i].Status, newSpokeOba.Status.Conditions[i].Status) + assert.WithinDuration(t, tc.expectConditions[i].LastTransitionTime.Time, newSpokeOba.Status.Conditions[i].LastTransitionTime.Time, time.Second) + } + }) + } +} + func newClient(objs ...runtime.Object) client.Client { s := scheme.Scheme addonv1alpha1.AddToScheme(s) @@ -319,38 +522,3 @@ func newObservabilityAddon(name string, ns string) *oav1beta1.ObservabilityAddon }, } } - -func addCondition(oba *oav1beta1.ObservabilityAddon, statusType string, status metav1.ConditionStatus) { - condition := oav1beta1.StatusCondition{ - Type: statusType, - Status: status, - Reason: "DummyReason", - Message: "DummyMessage", - } - oba.Status.Conditions = append(oba.Status.Conditions, condition) -} - -func newRequest() ctrl.Request { - return ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "install", - Namespace: testNamespace, - }, - } -} - -func newStatusReconciler(c client.Client, hubReload func() (client.Client, error)) *status.StatusReconciler { - hc, err := util.NewReloadableHubClientWithReloadFunc(hubReload) - if err != nil { - panic(err) - } - - return &status.StatusReconciler{ - Client: c, - HubClient: hc, - Namespace: testNamespace, - HubNamespace: testHubNamespace, - ObsAddonName: obAddonName, - Logger: logr.Discard(), - } -} diff --git a/operators/endpointmetrics/pkg/collector/metrics_collector.go b/operators/endpointmetrics/pkg/collector/metrics_collector.go index 5080adf60..3413f537b 100644 --- a/operators/endpointmetrics/pkg/collector/metrics_collector.go +++ b/operators/endpointmetrics/pkg/collector/metrics_collector.go @@ -29,9 +29,9 @@ import ( "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/openshift" "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/rendering" - "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/status" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" operatorconfig "github.com/stolostron/multicluster-observability-operator/operators/pkg/config" + "github.com/stolostron/multicluster-observability-operator/operators/pkg/status" "github.com/stolostron/multicluster-observability-operator/operators/pkg/util" ) @@ -103,41 +103,48 @@ type deploymentParams struct { func (m *MetricsCollector) Update(ctx context.Context, req ctrl.Request) error { deployParams, err := m.generateDeployParams(ctx, req) if err != nil { - m.reportStatus(ctx, status.Degraded) + m.reportStatus(ctx, status.MetricsCollector, status.UpdateFailed, "Failed to generate deployment parameters") return err } - var mcResult, uwlResult ensureDeploymentResult - if mcResult, err = m.updateMetricsCollector(ctx, false, deployParams); err != nil { - m.reportStatus(ctx, status.Degraded) + if err := m.updateMetricsCollector(ctx, false, deployParams); err != nil { + m.reportStatus(ctx, status.MetricsCollector, status.UpdateFailed, "Failed to update metrics collector") return err + } else { + if m.ObsAddon.Spec.EnableMetrics { + m.reportStatus(ctx, status.MetricsCollector, status.UpdateSuccessful, "Metrics collector updated") + } else { + m.reportStatus(ctx, status.MetricsCollector, status.Disabled, "Metrics collector disabled") + } } isUwl, err := m.isUWLMonitoringEnabled(ctx) if err != nil { - m.reportStatus(ctx, status.Degraded) + m.reportStatus(ctx, status.UwlMetricsCollector, status.UpdateFailed, "Failed to check if UWL monitoring is enabled") return err } uwlMetricsLen := len(deployParams.uwlList.NameList) + len(deployParams.uwlList.MatchList) if isUwl && uwlMetricsLen != 0 { - if uwlResult, err = m.updateMetricsCollector(ctx, true, deployParams); err != nil { - m.reportStatus(ctx, status.Degraded) + if err := m.updateMetricsCollector(ctx, true, deployParams); err != nil { + m.reportStatus(ctx, status.UwlMetricsCollector, status.UpdateFailed, "Failed to update UWL Metrics collector") return err + } else { + if m.ObsAddon.Spec.EnableMetrics { + m.reportStatus(ctx, status.UwlMetricsCollector, status.UpdateSuccessful, "UWL Metrics collector updated") + } else { + m.reportStatus(ctx, status.UwlMetricsCollector, status.Disabled, "UWL Metrics collector disabled") + } } } else { if err := m.deleteMetricsCollector(ctx, true); err != nil { - m.reportStatus(ctx, status.Degraded) + m.reportStatus(ctx, status.UwlMetricsCollector, status.UpdateFailed, err.Error()) return err + } else { + m.reportStatus(ctx, status.UwlMetricsCollector, status.Disabled, "UWL Metrics collector disabled") } } - if mcResult == deploymentCreated || uwlResult == deploymentCreated { - m.reportStatus(ctx, status.Deployed) - } else if mcResult == deploymentUpdated && !m.ObsAddon.Spec.EnableMetrics { - m.reportStatus(ctx, status.Disabled) - } - return nil } @@ -153,13 +160,16 @@ func (m *MetricsCollector) Delete(ctx context.Context) error { return nil } -func (m *MetricsCollector) reportStatus(ctx context.Context, conditionReason status.ConditionReason) { +func (m *MetricsCollector) reportStatus(ctx context.Context, component status.Component, conditionReason status.Reason, message string) { if m.ClusterInfo.IsHubMetricsCollector { return } - m.Log.Info("Reporting status", "conditionReason", conditionReason) - if err := status.ReportStatus(ctx, m.Client, conditionReason, m.ObsAddon.Name, m.Namespace); err != nil { + + statusReporter := status.NewStatus(m.Client, m.ObsAddon.Name, m.Namespace, m.Log) + if wasUpdated, err := statusReporter.UpdateComponentCondition(ctx, component, conditionReason, message); err != nil { m.Log.Error(err, "Failed to report status") + } else if wasUpdated { + m.Log.Info("Status reported", "component", component, "conditionReason", conditionReason, "message", message) } } @@ -247,25 +257,24 @@ func (m *MetricsCollector) deleteMetricsCollector(ctx context.Context, isUWL boo return nil } -func (m *MetricsCollector) updateMetricsCollector(ctx context.Context, isUWL bool, deployParams *deploymentParams) (ensureDeploymentResult, error) { +func (m *MetricsCollector) updateMetricsCollector(ctx context.Context, isUWL bool, deployParams *deploymentParams) error { if err := m.ensureService(ctx, isUWL); err != nil { - return "", err + return err } if err := m.ensureServiceMonitor(ctx, isUWL); err != nil { - return "", err + return err } if err := m.ensureAlertingRule(ctx, isUWL); err != nil { - return "", err + return err } - res, err := m.ensureDeployment(ctx, isUWL, deployParams) - if err != nil { - return "", err + if err := m.ensureDeployment(ctx, isUWL, deployParams); err != nil { + return err } - return res, nil + return nil } func (m *MetricsCollector) ensureService(ctx context.Context, isUWL bool) error { @@ -504,15 +513,7 @@ func (m *MetricsCollector) ensureAlertingRule(ctx context.Context, isUWL bool) e return nil } -type ensureDeploymentResult string - -const ( - deploymentCreated ensureDeploymentResult = "created" - deploymentUpdated ensureDeploymentResult = "updated" - deploymentNoop ensureDeploymentResult = "noop" -) - -func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, deployParams *deploymentParams) (ensureDeploymentResult, error) { +func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, deployParams *deploymentParams) error { secretName := metricsCollector if isUWL { secretName = uwlMetricsCollector @@ -744,8 +745,6 @@ func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, dep desiredMetricsCollectorDep.Spec.Template.Spec.Containers[0].Resources = *m.ObsAddon.Spec.Resources } - result := deploymentNoop - retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { foundMetricsCollectorDep := &appsv1.Deployment{} err := m.Client.Get(ctx, types.NamespacedName{Name: name, Namespace: m.Namespace}, foundMetricsCollectorDep) @@ -755,7 +754,6 @@ func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, dep return fmt.Errorf("failed to create Deployment %s/%s: %w", m.Namespace, name, err) } - result = deploymentCreated return nil } if err != nil { @@ -776,7 +774,6 @@ func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, dep return fmt.Errorf("failed to update Deployment %s/%s: %w", m.Namespace, name, err) } - result = deploymentUpdated return nil } @@ -784,10 +781,10 @@ func (m *MetricsCollector) ensureDeployment(ctx context.Context, isUWL bool, dep }) if retryErr != nil { - return deploymentNoop, retryErr + return retryErr } - return result, nil + return nil } func (m *MetricsCollector) getCommands(isUSW bool, deployParams *deploymentParams) []string { diff --git a/operators/endpointmetrics/pkg/status/status.go b/operators/endpointmetrics/pkg/status/status.go deleted file mode 100644 index b6e39a587..000000000 --- a/operators/endpointmetrics/pkg/status/status.go +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) Red Hat, Inc. -// Copyright Contributors to the Open Cluster Management project -// Licensed under the Apache License 2.0 - -package status - -import ( - "context" - "fmt" - "sort" - "time" - - oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type ConditionReason string - -const ( - Deployed ConditionReason = "Deployed" - Disabled ConditionReason = "Disabled" - Degraded ConditionReason = "Degraded" - NotSupported ConditionReason = "NotSupported" - MaxStatusConditionsCount = 10 -) - -var ( - conditions = map[ConditionReason]oav1beta1.StatusCondition{ - Deployed: { - Type: "Progressing", - Reason: string(Deployed), - Message: "Metrics collector deployed", - Status: metav1.ConditionTrue, - }, - Disabled: { - Type: "Disabled", - Reason: string(Disabled), - Message: "enableMetrics is set to False", - Status: metav1.ConditionTrue, - }, - Degraded: { - Type: "Degraded", - Reason: string(Degraded), - Message: "Metrics collector deployment not successful", - Status: metav1.ConditionTrue, - }, - NotSupported: { - Type: "NotSupported", - Reason: string(NotSupported), - Message: "No Prometheus service found in this cluster", - Status: metav1.ConditionTrue, - }, - } - log = ctrl.Log.WithName("status") -) - -func ReportStatus(ctx context.Context, client client.Client, conditionReason ConditionReason, addonName, addonNs string) error { - newCondition := conditions[conditionReason] - newCondition.LastTransitionTime = metav1.NewTime(time.Now()) - - // Fetch the ObservabilityAddon instance in local cluster, and update the status - // Retry on conflict - obsAddon := &oav1beta1.ObservabilityAddon{} - retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err := client.Get(ctx, types.NamespacedName{Name: addonName, Namespace: addonNs}, obsAddon); err != nil { - return err - } - - if !shouldUpdateConditions(obsAddon.Status.Conditions, newCondition) { - return nil - } - - obsAddon.Status.Conditions = deduplicateConditions(obsAddon.Status.Conditions) - obsAddon.Status.Conditions = resetMainConditionsStatus(obsAddon.Status.Conditions) - obsAddon.Status.Conditions = mutateOrAppend(obsAddon.Status.Conditions, newCondition) - - log.Info(fmt.Sprintf("Updating status of ObservabilityAddon %s/%s", addonNs, addonName), "type", newCondition.Type, "reason", newCondition.Reason) - - if len(obsAddon.Status.Conditions) > MaxStatusConditionsCount { - obsAddon.Status.Conditions = obsAddon.Status.Conditions[len(obsAddon.Status.Conditions)-MaxStatusConditionsCount:] - } - - return client.Status().Update(ctx, obsAddon) - }) - if retryErr != nil { - return retryErr - } - - return nil -} - -// mutateOrAppend updates the status conditions with the new condition. -// If the condition already exists, it updates it with the new condition. -// If the condition does not exist, it appends the new condition to the status conditions. -func mutateOrAppend(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) []oav1beta1.StatusCondition { - if len(conditions) == 0 { - return []oav1beta1.StatusCondition{newCondition} - } - - for i, condition := range conditions { - if condition.Type == newCondition.Type { - // Update the existing condition - conditions[i] = newCondition - return conditions - } - } - // If the condition type does not exist, append the new condition - return append(conditions, newCondition) -} - -// shouldAppendCondition checks if the new condition should be appended to the status conditions -// based on the last condition in the slice. -func shouldUpdateConditions(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) bool { - if len(conditions) == 0 { - return true - } - - sort.Slice(conditions, func(i, j int) bool { - return conditions[i].LastTransitionTime.Before(&conditions[j].LastTransitionTime) - }) - - lastCondition := conditions[len(conditions)-1] - - return lastCondition.Type != newCondition.Type || - lastCondition.Status != newCondition.Status || - lastCondition.Reason != newCondition.Reason || - lastCondition.Message != newCondition.Message -} - -// deduplicateConditions removes duplicate conditions from the list of conditions. -// It removes duplicated conditions introduced by PR #1427. -func deduplicateConditions(conditions []oav1beta1.StatusCondition) []oav1beta1.StatusCondition { - conditionMap := make(map[string]oav1beta1.StatusCondition) - for _, condition := range conditions { - if v, ok := conditionMap[condition.Type]; ok { - if condition.LastTransitionTime.After(v.LastTransitionTime.Time) { - conditionMap[condition.Type] = condition - } - } else { - conditionMap[condition.Type] = condition - } - } - - deduplicatedConditions := []oav1beta1.StatusCondition{} - for _, condition := range conditionMap { - deduplicatedConditions = append(deduplicatedConditions, condition) - } - - return deduplicatedConditions -} - -func resetMainConditionsStatus(conditions []oav1beta1.StatusCondition) []oav1beta1.StatusCondition { - for i := range conditions { - if conditions[i].Type == "Available" || conditions[i].Type == "Degraded" || conditions[i].Type == "Progressing" { - conditions[i].Status = metav1.ConditionFalse - } - } - return conditions -} diff --git a/operators/multiclusterobservability/controllers/placementrule/placementrule_controller.go b/operators/multiclusterobservability/controllers/placementrule/placementrule_controller.go index 60702da9c..088c1dfd1 100644 --- a/operators/multiclusterobservability/controllers/placementrule/placementrule_controller.go +++ b/operators/multiclusterobservability/controllers/placementrule/placementrule_controller.go @@ -73,6 +73,9 @@ type PlacementRuleReconciler struct { Scheme *runtime.Scheme CRDMap map[string]bool RESTMapper meta.RESTMapper + + statusIsInitialized bool + statusMu sync.Mutex } // +kubebuilder:rbac:groups=observability.open-cluster-management.io,resources=placementrules,verbs=get;list;watch;create;update;patch;delete @@ -261,12 +264,17 @@ func (r *PlacementRuleReconciler) Reconcile(ctx context.Context, req ctrl.Reques } // only update managedclusteraddon status when obs addon's status updated - if req.Name == obsAddonName { - err = updateAddonStatus(r.Client, *obsAddonList) + // ensure the status is updated once in the reconcile loop when the controller starts + r.statusMu.Lock() + if req.Name == obsAddonName || !r.statusIsInitialized { + r.statusIsInitialized = true + err = updateAddonStatus(ctx, r.Client, *obsAddonList) if err != nil { + r.statusMu.Unlock() return ctrl.Result{}, err } } + r.statusMu.Unlock() if deleteAll { // delete managedclusteraddon for local-cluster diff --git a/operators/multiclusterobservability/controllers/placementrule/status.go b/operators/multiclusterobservability/controllers/placementrule/status.go index a8efda6a9..469961cfc 100644 --- a/operators/multiclusterobservability/controllers/placementrule/status.go +++ b/operators/multiclusterobservability/controllers/placementrule/status.go @@ -6,11 +6,12 @@ package placementrule import ( "context" - "reflect" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" mcov1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" @@ -18,60 +19,61 @@ import ( addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" ) -var ( - statusMap = map[string]string{ - "Available": "Available", - "Progressing": "Progressing", - "Deployed": "Progressing", - "Disabled": "Degraded", - "Degraded": "Degraded", - "NotSupported": "Degraded", - } -) - -func updateAddonStatus(c client.Client, addonList mcov1beta1.ObservabilityAddonList) error { +func updateAddonStatus(ctx context.Context, c client.Client, addonList mcov1beta1.ObservabilityAddonList) error { for _, addon := range addonList.Items { if addon.Status.Conditions == nil || len(addon.Status.Conditions) == 0 { continue } - conditions := []metav1.Condition{} - for _, c := range addon.Status.Conditions { - condition := metav1.Condition{ - Type: statusMap[c.Type], - Status: c.Status, - LastTransitionTime: c.LastTransitionTime, - Reason: c.Reason, - Message: c.Message, - } - conditions = append(conditions, condition) - } - managedclusteraddon := &addonv1alpha1.ManagedClusterAddOn{} - err := c.Get(context.TODO(), types.NamespacedName{ - Name: util.ManagedClusterAddonName, - Namespace: addon.ObjectMeta.Namespace, - }, managedclusteraddon) - if err != nil { - if errors.IsNotFound(err) { - log.Info("managedclusteraddon does not exist", "namespace", addon.ObjectMeta.Namespace) - continue - } - log.Error(err, "Failed to get managedclusteraddon", "namespace", addon.ObjectMeta.Namespace) - return err - } - if !reflect.DeepEqual(conditions, managedclusteraddon.Status.Conditions) { - managedclusteraddon.Status.Conditions = conditions - err = c.Status().Update(context.TODO(), managedclusteraddon) + obsAddonConditions := convertConditionsToMeta(addon.Status.Conditions) + isUpdated := false + retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + managedclusteraddon := &addonv1alpha1.ManagedClusterAddOn{} + err := c.Get(ctx, types.NamespacedName{ + Name: util.ManagedClusterAddonName, + Namespace: addon.ObjectMeta.Namespace, + }, managedclusteraddon) if err != nil { - log.Error( - err, - "Failed to update status for managedclusteraddon", - "namespace", - addon.ObjectMeta.Namespace, - ) + if errors.IsNotFound(err) { + log.Info("managedclusteraddon does not exist", "namespace", addon.ObjectMeta.Namespace, "name", util.ManagedClusterAddonName) + return nil + } + log.Error(err, "Failed to get managedclusteraddon", "namespace", addon.ObjectMeta.Namespace, "name", util.ManagedClusterAddonName) return err } + + if equality.Semantic.DeepEqual(obsAddonConditions, managedclusteraddon.Status.Conditions) { + return nil + } + + managedclusteraddon.Status.Conditions = obsAddonConditions + isUpdated = true + + return c.Status().Update(context.TODO(), managedclusteraddon) + }) + if retryErr != nil { + log.Error(retryErr, "Failed to update status for managedclusteraddon", "namespace", addon.ObjectMeta.Namespace) + return retryErr + } + + if isUpdated { log.Info("Updated status for managedclusteraddon", "namespace", addon.ObjectMeta.Namespace) } } + return nil } + +func convertConditionsToMeta(conditions []mcov1beta1.StatusCondition) []metav1.Condition { + var metaConditions []metav1.Condition + for _, c := range conditions { + metaCondition := metav1.Condition{ + Type: c.Type, + Status: c.Status, + LastTransitionTime: c.LastTransitionTime, + Reason: c.Reason, + Message: c.Message, + } + metaConditions = append(metaConditions, metaCondition) + } + return metaConditions +} diff --git a/operators/multiclusterobservability/controllers/placementrule/status_test.go b/operators/multiclusterobservability/controllers/placementrule/status_test.go index 5bd91570e..17f803b18 100644 --- a/operators/multiclusterobservability/controllers/placementrule/status_test.go +++ b/operators/multiclusterobservability/controllers/placementrule/status_test.go @@ -6,6 +6,7 @@ package placementrule import ( "context" + "slices" "testing" "time" @@ -18,67 +19,158 @@ import ( mcov1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" mcov1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/pkg/util" + "github.com/stretchr/testify/assert" ) func TestUpdateAddonStatus(t *testing.T) { - maddon := &addonv1alpha1.ManagedClusterAddOn{ - ObjectMeta: metav1.ObjectMeta{ - Name: util.ManagedClusterAddonName, - Namespace: namespace, + testCases := map[string]struct { + currentObsAddonConditions []mcov1beta1.StatusCondition + currentClusterAddonConditions []metav1.Condition + expectedClusterAddonConditions []metav1.Condition + isUpdated bool + }{ + "updated addon conditions should be applied": { + currentObsAddonConditions: []mcov1beta1.StatusCondition{ + { + Type: "Available", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: "Deployed", + Message: "It is deployed", + }, + }, + currentClusterAddonConditions: []metav1.Condition{}, + expectedClusterAddonConditions: []metav1.Condition{ + { + Type: "Available", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: "Deployed", + Message: "It is deployed", + }, + }, + isUpdated: true, }, - Status: addonv1alpha1.ManagedClusterAddOnStatus{}, + "same conditions should not be updated": { + currentObsAddonConditions: []mcov1beta1.StatusCondition{ + { + Type: "Available", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Unix(1e9, 0)), + Reason: "Deployed", + Message: "It is deployed", + }, + }, + currentClusterAddonConditions: []metav1.Condition{ + { + Type: "Available", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Unix(1e9, 0)), + Reason: "Deployed", + Message: "It is deployed", + }, + }, + expectedClusterAddonConditions: []metav1.Condition{ + { + Type: "Available", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Unix(1e9, 0)), + Reason: "Deployed", + Message: "It is deployed", + }, + }, + isUpdated: false, + }, + } + + sortConditionsFunc := func(a, b metav1.Condition) int { + if a.Type < b.Type { + return -1 + } + if a.Type > b.Type { + return 1 + } + return 0 } - scheme := runtime.NewScheme() - addonv1alpha1.AddToScheme(scheme) - mcov1beta1.AddToScheme(scheme) - mcov1beta2.AddToScheme(scheme) - objs := []runtime.Object{maddon} - c := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objs...). - WithStatusSubresource( - &addonv1alpha1.ManagedClusterAddOn{}, - &mcov1beta2.MultiClusterObservability{}, - &mcov1beta1.ObservabilityAddon{}, - ). - Build() + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + scheme := runtime.NewScheme() + addonv1alpha1.AddToScheme(scheme) + mcov1beta1.AddToScheme(scheme) + mcov1beta2.AddToScheme(scheme) - addonList := &mcov1beta1.ObservabilityAddonList{ - Items: []mcov1beta1.ObservabilityAddon{ - { + clusterAddon := &addonv1alpha1.ManagedClusterAddOn{ ObjectMeta: metav1.ObjectMeta{ - Name: obsAddonName, + Name: util.ManagedClusterAddonName, Namespace: namespace, }, - Status: mcov1beta1.ObservabilityAddonStatus{ - Conditions: []mcov1beta1.StatusCondition{ - { - Type: "Available", - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.NewTime(time.Now()), - Reason: "Deployed", - Message: "Metrics collector deployed and functional", + Status: addonv1alpha1.ManagedClusterAddOnStatus{ + Conditions: tc.currentClusterAddonConditions, + }, + } + + c := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(clusterAddon). + WithStatusSubresource( + &addonv1alpha1.ManagedClusterAddOn{}, + &mcov1beta2.MultiClusterObservability{}, + &mcov1beta1.ObservabilityAddon{}, + ). + Build() + + addonList := mcov1beta1.ObservabilityAddonList{ + Items: []mcov1beta1.ObservabilityAddon{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: obsAddonName, + Namespace: namespace, + }, + Status: mcov1beta1.ObservabilityAddonStatus{ + Conditions: tc.currentObsAddonConditions, }, }, }, - }, - }, - } + } - err := updateAddonStatus(c, *addonList) - if err != nil { - t.Fatalf("Failed to update status for managedclusteraddon: (%v)", err) - } + foundClusterAddon := &addonv1alpha1.ManagedClusterAddOn{} + if err := c.Get(context.Background(), types.NamespacedName{ + Name: util.ManagedClusterAddonName, + Namespace: namespace, + }, foundClusterAddon); err != nil { + t.Fatalf("Failed to get managedclusteraddon: (%v)", err) + } + initVersion := foundClusterAddon.ResourceVersion - err = c.Get(context.TODO(), types.NamespacedName{ - Name: util.ManagedClusterAddonName, - Namespace: namespace, - }, maddon) - if err != nil { - t.Fatalf("Failed to get managedclusteraddon: (%v)", err) - } - if maddon.Status.Conditions == nil || len(maddon.Status.Conditions) != 1 { - t.Fatalf("Status not updated correctly in managedclusteraddon: (%v)", maddon) + err := updateAddonStatus(context.Background(), c, addonList) + if err != nil { + t.Fatalf("Failed to update status for managedclusteraddon: (%v)", err) + } + + if err := c.Get(context.Background(), types.NamespacedName{ + Name: util.ManagedClusterAddonName, + Namespace: namespace, + }, foundClusterAddon); err != nil { + t.Fatalf("Failed to get managedclusteraddon: (%v)", err) + } + + if tc.isUpdated { + assert.NotEqual(t, initVersion, foundClusterAddon.ResourceVersion) + } else { + assert.Equal(t, initVersion, foundClusterAddon.ResourceVersion) + } + + slices.SortFunc(foundClusterAddon.Status.Conditions, sortConditionsFunc) + slices.SortFunc(tc.expectedClusterAddonConditions, sortConditionsFunc) + assert.Equal(t, len(tc.expectedClusterAddonConditions), len(foundClusterAddon.Status.Conditions)) + for i := range tc.expectedClusterAddonConditions { + assert.Equal(t, tc.expectedClusterAddonConditions[i].Type, foundClusterAddon.Status.Conditions[i].Type) + assert.Equal(t, tc.expectedClusterAddonConditions[i].Status, foundClusterAddon.Status.Conditions[i].Status) + assert.Equal(t, tc.expectedClusterAddonConditions[i].Reason, foundClusterAddon.Status.Conditions[i].Reason) + assert.Equal(t, tc.expectedClusterAddonConditions[i].Message, foundClusterAddon.Status.Conditions[i].Message) + assert.InEpsilon(t, tc.expectedClusterAddonConditions[i].LastTransitionTime.Unix(), foundClusterAddon.Status.Conditions[i].LastTransitionTime.Unix(), 1) + } + }) } } diff --git a/operators/pkg/status/status.go b/operators/pkg/status/status.go new file mode 100644 index 000000000..ff410ba41 --- /dev/null +++ b/operators/pkg/status/status.go @@ -0,0 +1,186 @@ +// Copyright (c) Red Hat, Inc. +// Copyright Contributors to the Open Cluster Management project +// Licensed under the Apache License 2.0 + +package status + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Component defines the components of the ObservabilityAddon +// each reports its own status condition +type Component string + +const ( + MetricsCollector Component = "MetricsCollector" + UwlMetricsCollector Component = "UwlMetricsCollector" +) + +// Reason defines the Reason for the status condition +type Reason string + +var ( + // When adding a new Reason, make sure to update the status controller package + // to aggreagate correctly the status of the ObservabilityAddon + UpdateSuccessful Reason = "UpdateSuccessful" + UpdateFailed Reason = "UpdateFailed" + ForwardSuccessful Reason = "ForwardSuccessful" + ForwardFailed Reason = "ForwardFailed" + Disabled Reason = "Disabled" + NotSupported Reason = "NotSupported" +) + +var ( + // componentTransitions defines the valid transitions between component conditions + componentTransitions = map[Reason]map[Reason]struct{}{ + UpdateSuccessful: { + UpdateFailed: {}, + ForwardSuccessful: {}, + ForwardFailed: {}, + Disabled: {}, + NotSupported: {}, + }, + UpdateFailed: { + UpdateSuccessful: {}, + Disabled: {}, + NotSupported: {}, + }, + ForwardSuccessful: { + ForwardFailed: {}, + UpdateSuccessful: {}, + UpdateFailed: {}, + Disabled: {}, + NotSupported: {}, + }, + ForwardFailed: { + ForwardSuccessful: {}, + UpdateSuccessful: {}, + UpdateFailed: {}, + Disabled: {}, + NotSupported: {}, + }, + Disabled: { + UpdateSuccessful: {}, + UpdateFailed: {}, + NotSupported: {}, + }, + NotSupported: { + UpdateSuccessful: {}, + UpdateFailed: {}, + Disabled: {}, + }, + } +) + +// Status provides a method to update the status of the ObservabilityAddon for a specific component +type Status struct { + client client.Client + addonName string + addonNs string + logger logr.Logger +} + +// NewStatus creates a new Status instance +func NewStatus(client client.Client, addonName, addonNs string, logger logr.Logger) Status { + return Status{ + client: client, + addonName: addonName, + addonNs: addonNs, + logger: logger, + } +} + +// UpdateComponentCondition updates the status condition of a specific component of the ObservabilityAddon +// It returns an error if the update fails for a permanent reason or after exhausting retries on conflict. +// It will also return an error if the transition between conditions is invalid, to avoid flapping. +// Finally it return a boolean indicating if the condition was updated or not. This is useful to avoid +// unnecessary logging when the condition is not updated because it is the same as the current one. +func (s Status) UpdateComponentCondition(ctx context.Context, componentName Component, newReason Reason, newMessage string) (bool, error) { + var wasUpdated bool + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + addon, err := s.fetchAddon(ctx) + if err != nil { + return err + } + + newCondition := oav1beta1.StatusCondition{ + Type: string(componentName), + Reason: string(newReason), + Message: newMessage, + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + } + + currentCondition := getConditionByType(addon.Status.Conditions, string(componentName)) + + // check if the condition needs to be updated + isSameCondition := currentCondition != nil && currentCondition.Reason == newCondition.Reason && currentCondition.Message == newCondition.Message && currentCondition.Status == newCondition.Status + if isSameCondition { + return nil + } + + // check if the transition is valid for the component + // this is to avoid flapping between conditions + if currentCondition != nil { + if _, ok := componentTransitions[Reason(currentCondition.Reason)][newReason]; !ok { + return fmt.Errorf("invalid transition from %s to %s for component %s", currentCondition.Reason, newReason, componentName) + } + } + + addon.Status.Conditions = mutateOrAppend(addon.Status.Conditions, newCondition) + wasUpdated = true + + return s.client.Status().Update(ctx, addon) + }) + if retryErr != nil { + return wasUpdated, retryErr + } + + return wasUpdated, nil +} + +func (s Status) fetchAddon(ctx context.Context) (*oav1beta1.ObservabilityAddon, error) { + obsAddon := &oav1beta1.ObservabilityAddon{} + if err := s.client.Get(ctx, types.NamespacedName{Name: s.addonName, Namespace: s.addonNs}, obsAddon); err != nil { + return nil, fmt.Errorf("failed to get ObservabilityAddon %s/%s: %w", s.addonNs, s.addonName, err) + } + return obsAddon, nil +} + +func getConditionByType(conditions []oav1beta1.StatusCondition, conditionType string) *oav1beta1.StatusCondition { + for _, condition := range conditions { + if condition.Type == conditionType { + return &condition + } + } + return nil +} + +// mutateOrAppend updates the status conditions with the new condition. +// If the condition already exists, it updates it with the new condition. +// If the condition does not exist, it appends the new condition to the status conditions. +func mutateOrAppend(conditions []oav1beta1.StatusCondition, newCondition oav1beta1.StatusCondition) []oav1beta1.StatusCondition { + if len(conditions) == 0 { + return []oav1beta1.StatusCondition{newCondition} + } + + for i, condition := range conditions { + if condition.Type == newCondition.Type { + // Update the existing condition + conditions[i] = newCondition + return conditions + } + } + // If the condition type does not exist, append the new condition + return append(conditions, newCondition) +} diff --git a/operators/endpointmetrics/pkg/status/status_test.go b/operators/pkg/status/status_test.go similarity index 53% rename from operators/endpointmetrics/pkg/status/status_test.go rename to operators/pkg/status/status_test.go index d5227448a..a82d1b9b1 100644 --- a/operators/endpointmetrics/pkg/status/status_test.go +++ b/operators/pkg/status/status_test.go @@ -2,7 +2,7 @@ // Copyright Contributors to the Open Cluster Management project // Licensed under the Apache License 2.0 -package status_test +package status import ( "context" @@ -10,7 +10,7 @@ import ( "testing" "time" - "github.com/stolostron/multicluster-observability-operator/operators/endpointmetrics/pkg/status" + "github.com/go-logr/logr" oav1beta1 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta1" mcov1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" "github.com/stretchr/testify/assert" @@ -24,46 +24,45 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func newObservabilityAddon(name string, ns string) *oav1beta1.ObservabilityAddon { - return &oav1beta1.ObservabilityAddon{ - TypeMeta: metav1.TypeMeta{ - APIVersion: oav1beta1.GroupVersion.String(), - Kind: "ObservabilityAddon", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - }, - } -} - func TestReportStatus(t *testing.T) { s := scheme.Scheme assert.NoError(t, oav1beta1.AddToScheme(s)) assert.NoError(t, addonv1alpha1.AddToScheme(s)) assert.NoError(t, mcov1beta2.AddToScheme(s)) + type updateParams struct { + component Component + reason Reason + message string + } + testCases := map[string]struct { currentConditions []oav1beta1.StatusCondition - newCondition status.ConditionReason - expects func(*testing.T, []oav1beta1.StatusCondition) + updateParams updateParams + expects func(*testing.T, bool, error, []oav1beta1.StatusCondition) }{ "new status should be appended": { currentConditions: []oav1beta1.StatusCondition{}, - newCondition: status.Deployed, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { + updateParams: updateParams{ + component: "MetricsCollector", + reason: UpdateSuccessful, + message: "Metrics collector updated", + }, + expects: func(t *testing.T, wasUpdated bool, updateErr error, conditions []oav1beta1.StatusCondition) { + assert.NoError(t, updateErr) assert.Len(t, conditions, 1) - assert.EqualValues(t, status.Deployed, conditions[0].Reason) + assert.EqualValues(t, UpdateSuccessful, conditions[0].Reason) assert.Equal(t, metav1.ConditionTrue, conditions[0].Status) - assert.Equal(t, "Progressing", conditions[0].Type) + assert.Equal(t, "MetricsCollector", conditions[0].Type) assert.InEpsilon(t, time.Now().Unix(), conditions[0].LastTransitionTime.Unix(), 1) + assert.True(t, wasUpdated) }, }, "existing status should be updated": { currentConditions: []oav1beta1.StatusCondition{ { - Type: "Progressing", - Reason: string(status.Deployed), + Type: "MetricsCollector", + Reason: string(ForwardSuccessful), Message: "Metrics collector deployed", Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{ @@ -71,109 +70,97 @@ func TestReportStatus(t *testing.T) { }, }, { - Type: "Disabled", - Reason: string(status.Disabled), - Message: "enableMetrics is set to False", + Type: "Available", + Reason: string(ForwardSuccessful), + Message: "Metrics collector available", Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{ Time: time.Now().Add(-2 * time.Minute), }, }, }, - newCondition: status.Disabled, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { - assert.Len(t, conditions, 2) - found := false + updateParams: updateParams{ + component: "MetricsCollector", + reason: UpdateFailed, + message: "Metrics collector disabled", + }, + expects: func(t *testing.T, wasUpdated bool, updateErr error, conditions []oav1beta1.StatusCondition) { + assert.NoError(t, updateErr) + condMap := make(map[string]oav1beta1.StatusCondition) for _, c := range conditions { - if c.Reason == string(status.Disabled) { - found = true - assert.EqualValues(t, status.Disabled, c.Reason) - assert.Equal(t, metav1.ConditionTrue, c.Status) - assert.Equal(t, "Disabled", c.Type) - assert.InEpsilon(t, time.Now().Unix(), c.LastTransitionTime.Unix(), 1) - } else { - // other condition should not be changed - assert.EqualValues(t, status.Deployed, c.Reason) - assert.InEpsilon(t, time.Now().Add(-time.Minute).Unix(), c.LastTransitionTime.Unix(), 1) - } + condMap[c.Type] = c } - assert.True(t, found, "condition not found") + assert.Len(t, condMap, 2) + mcCond := condMap["MetricsCollector"] + assert.EqualValues(t, UpdateFailed, mcCond.Reason) + assert.Equal(t, metav1.ConditionTrue, mcCond.Status) + assert.InEpsilon(t, time.Now().Unix(), mcCond.LastTransitionTime.Unix(), 1) + availCond := condMap["Available"] + assert.EqualValues(t, ForwardSuccessful, availCond.Reason) + assert.InEpsilon(t, time.Now().Add(-2*time.Minute).Unix(), availCond.LastTransitionTime.Unix(), 1) + assert.True(t, wasUpdated) }, }, "existing status should not be updated if same": { currentConditions: []oav1beta1.StatusCondition{ { - Type: "Progressing", - Reason: string(status.Deployed), + Type: "MetricsCollector", + Reason: string(ForwardSuccessful), Message: "Metrics collector deployed", Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{ - Time: time.Now().Add(-time.Minute), // current state (most recent) + Time: time.Now().Add(-3 * time.Minute), // current state (most recent) }, }, { - Type: "Disabled", + Type: "Available", + Reason: string(ForwardSuccessful), + Message: "Metrics collector available", + Status: metav1.ConditionTrue, LastTransitionTime: metav1.Time{ Time: time.Now().Add(-2 * time.Minute), }, }, }, - newCondition: status.Deployed, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { - assert.Len(t, conditions, 2) - assert.EqualValues(t, status.Deployed, conditions[0].Reason) - assert.InEpsilon(t, time.Now().Add(-time.Minute).Unix(), conditions[0].LastTransitionTime.Unix(), 1) + updateParams: updateParams{ + component: "MetricsCollector", + reason: ForwardSuccessful, + message: "Metrics collector deployed", }, - }, - "number of conditions should not exceed MaxStatusConditionsCount": { - currentConditions: []oav1beta1.StatusCondition{ - {Type: "1"}, {Type: "2"}, {Type: "3"}, {Type: "4"}, {Type: "5"}, - {Type: "6"}, {Type: "7"}, {Type: "8"}, {Type: "9"}, {Type: "10"}, - }, - newCondition: status.Deployed, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { - assert.Len(t, conditions, status.MaxStatusConditionsCount) - assert.EqualValues(t, status.Deployed, conditions[len(conditions)-1].Reason) - }, - }, - "duplicated conditions should be removed": { - currentConditions: []oav1beta1.StatusCondition{ - {Type: "Progressing", LastTransitionTime: metav1.Time{Time: time.Now()}}, // most recent duplicated condition - {Type: "Degraded", LastTransitionTime: metav1.Time{Time: time.Now().Add(-time.Minute)}}, - {Type: "Progressing", LastTransitionTime: metav1.Time{Time: time.Now().Add(-time.Minute)}}, - {Type: "Degraded", LastTransitionTime: metav1.Time{Time: time.Now().Add(-time.Minute)}}, - }, - newCondition: status.Deployed, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { - assert.Len(t, conditions, 2) + expects: func(t *testing.T, wasUpdated bool, updateErr error, conditions []oav1beta1.StatusCondition) { + assert.NoError(t, updateErr) + condMap := make(map[string]oav1beta1.StatusCondition) for _, c := range conditions { - switch c.Type { - case "Progressing": - assert.InEpsilon(t, time.Now().Unix(), c.LastTransitionTime.Unix(), 1) - case "Degraded": - assert.InEpsilon(t, time.Now().Add(-time.Minute).Unix(), c.LastTransitionTime.Unix(), 1) - default: - t.Fatalf("unexpected condition type: %s", c.Type) - } + condMap[c.Type] = c } + assert.Len(t, condMap, 2) + mcCond := condMap["MetricsCollector"] + // check that the time has not been updated + assert.InEpsilon(t, time.Now().Add(-3*time.Minute).Unix(), mcCond.LastTransitionTime.Unix(), 1) + assert.False(t, wasUpdated) }, }, - "only one of the main conditions should be true": { + "invalid transitions should be rejected": { currentConditions: []oav1beta1.StatusCondition{ - {Type: "Progressing", Status: metav1.ConditionTrue}, - {Type: "Degraded", Status: metav1.ConditionTrue}, - {Type: "Available", Status: metav1.ConditionTrue}, + { + Type: "MetricsCollector", + Reason: string(UpdateFailed), + Message: "Metrics collector broken", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{ + Time: time.Now().Add(-time.Minute), // current state (most recent) + }, + }, }, - newCondition: status.Deployed, - expects: func(t *testing.T, conditions []oav1beta1.StatusCondition) { - assert.Len(t, conditions, 3) - for _, c := range conditions { - if c.Type == "Progressing" { - assert.Equal(t, metav1.ConditionTrue, c.Status) - } else { - assert.Equal(t, metav1.ConditionFalse, c.Status) - } - } + updateParams: updateParams{ + component: "MetricsCollector", + reason: ForwardSuccessful, + message: "Metrics collector is now working", + }, + expects: func(t *testing.T, wasUpdated bool, resultErr error, conditions []oav1beta1.StatusCondition) { + assert.Len(t, conditions, 1) + assert.Error(t, resultErr) + assert.Contains(t, resultErr.Error(), "invalid transition") }, }, } @@ -191,19 +178,14 @@ func TestReportStatus(t *testing.T) { } // test - if err := status.ReportStatus(context.Background(), client, tc.newCondition, baseAddon.Name, baseAddon.Namespace); err != nil { - t.Fatalf("Error reporting status: %v", err) - } + statusUpdater := NewStatus(client, baseAddon.Name, baseAddon.Namespace, logr.Logger{}) + wasUpdated, updateErr := statusUpdater.UpdateComponentCondition(context.Background(), tc.updateParams.component, tc.updateParams.reason, tc.updateParams.message) + newAddon := &oav1beta1.ObservabilityAddon{} if err := client.Get(context.Background(), types.NamespacedName{Name: baseAddon.Name, Namespace: baseAddon.Namespace}, newAddon); err != nil { t.Fatalf("Error getting observabilityaddon: (%v)", err) } - tc.expects(t, newAddon.Status.Conditions) - - // cleanup - if err := client.Delete(context.Background(), newAddon); err != nil { - t.Fatalf("Error deleting observabilityaddon: %v", err) - } + tc.expects(t, wasUpdated, updateErr, newAddon.Status.Conditions) }) } } @@ -218,12 +200,27 @@ func TestReportStatus_Conflict(t *testing.T) { fakeClient := fake.NewClientBuilder().WithScheme(s).WithRuntimeObjects(oa).Build() conflictErr := errors.NewConflict(schema.GroupResource{Group: oav1beta1.GroupVersion.Group, Resource: "resource"}, name, fmt.Errorf("conflict")) - c := newClientWithUpdateError(fakeClient, conflictErr) - if err := status.ReportStatus(context.Background(), c, status.Deployed, name, testNamespace); err == nil { - t.Fatalf("Conflict error should be retried and return an error if it fails") + client := newClientWithUpdateError(fakeClient, conflictErr) + statusUpdater := NewStatus(client, name, testNamespace, logr.Logger{}) + if _, err := statusUpdater.UpdateComponentCondition(context.Background(), "MetricsCollector", UpdateSuccessful, "Metrics collector updated"); err == nil { + t.Fatalf("Conflict error should be retried and return no error if it succeeds") } - if c.UpdateCallsCount() <= 1 { - t.Errorf("Conflict error should be retried, called %d times", c.UpdateCallsCount()) + + if client.UpdateCallsCount() <= 1 { + t.Errorf("Conflict error should be retried, called %d times", client.UpdateCallsCount()) + } +} + +func newObservabilityAddon(name string, ns string) *oav1beta1.ObservabilityAddon { + return &oav1beta1.ObservabilityAddon{ + TypeMeta: metav1.TypeMeta{ + APIVersion: oav1beta1.GroupVersion.String(), + Kind: "ObservabilityAddon", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, } } diff --git a/proxy/pkg/util/util_test.go b/proxy/pkg/util/util_test.go index d40570055..b1a02c6da 100644 --- a/proxy/pkg/util/util_test.go +++ b/proxy/pkg/util/util_test.go @@ -751,6 +751,7 @@ func TestUpdateAllManagedClusterLabelNames(t *testing.T) { // The label list does not appear to be deterministically sorted // Sorting here in order to ensure the test can pass reliably. slices.Sort(syncLabelList.RegexLabelList) + slices.Sort(expectedRegexList) if !reflect.DeepEqual(syncLabelList.RegexLabelList, expectedRegexList) { t.Errorf("syncLabelList.RegexLabelList = %v, want %v", syncLabelList.RegexLabelList, expectedRegexList) }