From 0cd890fb82421dbac6b163a3b736db25194550d0 Mon Sep 17 00:00:00 2001 From: Jacob Baungard Hansen Date: Thu, 14 Nov 2024 12:51:00 +0100 Subject: [PATCH 1/3] Render: fail if no oauth image is found For some unknown reason, during upgrades from ACM 2.11, the template image is briefly used, instead of the image from the openshift imagestream. The template image is not available in disconnected environments, and as a result causes the pods be unable to come up. This is a problem especially for alertmanager. Because of it being a statefulset when it gets into the bad state, with the wrong image, it doesn't automatically recover on the next reconcile. This requires manual intervention to fix. Instead with this PR, we make the reconcile fail if we do not find the oauth image. This will make it retry later, when the imagestream is able to be found. Signed-off-by: Jacob Baungard Hansen --- .../pkg/rendering/renderer_alertmanager.go | 2 ++ .../pkg/rendering/renderer_grafana.go | 4 ++++ .../multiclusterobservability/pkg/rendering/renderer_proxy.go | 3 +++ 3 files changed, 9 insertions(+) diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go index da951cb7c..978feb25a 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go @@ -126,6 +126,8 @@ func (r *MCORenderer) renderAlertManagerStatefulSet(res *resource.Resource, name found, image = mcoconfig.GetOauthProxyImage(r.imageClient) if found { oauthProxyContainer.Image = image + } else { + return nil, fmt.Errorf("failed to get OAuth image for alertmanager") } oauthProxyContainer.ImagePullPolicy = imagePullPolicy diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go b/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go index d60e8af23..49ef1de51 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go @@ -5,6 +5,8 @@ package rendering import ( + "fmt" + v1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -71,6 +73,8 @@ func (r *MCORenderer) renderGrafanaDeployments(res *resource.Resource, found, image = config.GetOauthProxyImage(r.imageClient) if found { spec.Containers[2].Image = image + } else { + return nil, fmt.Errorf("failed to get OAuth image for Grafana") } spec.Containers[2].ImagePullPolicy = imagePullPolicy diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go b/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go index 4c9ddf3e5..d0cff6f17 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go @@ -5,6 +5,7 @@ package rendering import ( + "fmt" "strings" v1 "k8s.io/api/apps/v1" @@ -96,6 +97,8 @@ func (r *MCORenderer) renderProxyDeployment(res *resource.Resource, found, image = mcoconfig.GetOauthProxyImage(r.imageClient) if found { spec.Containers[1].Image = image + } else { + return nil, fmt.Errorf("failed to get OAuth image for rbacqueryproxy") } for idx := range spec.Volumes { From 7e1868f07a8ff7f2447985685ce21ed11ce7571f Mon Sep 17 00:00:00 2001 From: Jacob Baungard Hansen Date: Fri, 15 Nov 2024 15:42:42 +0100 Subject: [PATCH 2/3] Pass ImageV1Interface instead of Client for tests Instead of passing through the `ImageV1Client` we instead use the interface `ImageV1Interface`. This is so we're able to pass in a faked ImageClient for tests. This is needed because since reconciles, as per the previous commit, will actually fail if we cannot get the image from the imagestream. Signed-off-by: Jacob Baungard Hansen --- .../multiclusterobservability_controller.go | 2 +- ...lticlusterobservability_controller_test.go | 60 +++++++++++++++++-- .../pkg/rendering/renderer.go | 4 +- .../rendering/renderer_alertmanager_test.go | 30 +++++++++- .../pkg/rendering/renderer_test.go | 25 +++++++- 5 files changed, 112 insertions(+), 9 deletions(-) diff --git a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go index 0707035ea..5df953d9e 100644 --- a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go +++ b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller.go @@ -91,7 +91,7 @@ type MultiClusterObservabilityReconciler struct { CRDMap map[string]bool APIReader client.Reader RESTMapper meta.RESTMapper - ImageClient *imagev1client.ImageV1Client + ImageClient imagev1client.ImageV1Interface } // +kubebuilder:rbac:groups=observability.open-cluster-management.io,resources=multiclusterobservabilities,verbs=get;list;watch;create;update;patch;delete diff --git a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go index 3522595bf..dc7343a84 100644 --- a/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go +++ b/operators/multiclusterobservability/controllers/multiclusterobservability/multiclusterobservability_controller_test.go @@ -47,6 +47,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log/zap" migrationv1alpha1 "sigs.k8s.io/kube-storage-version-migrator/pkg/apis/migration/v1alpha1" + + imagev1 "github.com/openshift/api/image/v1" + fakeimageclient "github.com/openshift/client-go/image/clientset/versioned/fake" + fakeimagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1/fake" ) func init() { @@ -350,8 +354,32 @@ func TestMultiClusterMonitoringCRUpdate(t *testing.T) { ). Build() + // Create fake imagestream client + imageClient := &fakeimagev1client.FakeImageV1{Fake: &(fakeimageclient.NewSimpleClientset().Fake)} + _, err := imageClient.ImageStreams(config.OauthProxyImageStreamNamespace).Create(context.Background(), + &imagev1.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: config.OauthProxyImageStreamName, + Namespace: config.OauthProxyImageStreamNamespace, + }, + Spec: imagev1.ImageStreamSpec{ + Tags: []imagev1.TagReference{ + { + Name: "v4.4", + From: &corev1.ObjectReference{ + Kind: "DockerImage", + Name: "quay.io/openshift-release-dev/ocp-v4.0-art-dev", + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + // Create a ReconcileMemcached object with the scheme and fake client. - r := &MultiClusterObservabilityReconciler{Client: cl, Scheme: s, CRDMap: map[string]bool{config.IngressControllerCRD: true}} + r := &MultiClusterObservabilityReconciler{Client: cl, Scheme: s, CRDMap: map[string]bool{config.IngressControllerCRD: true}, ImageClient: imageClient} config.SetMonitoringCRName(name) // Mock request to simulate Reconcile() being called on an event for a // watched resource . @@ -363,7 +391,7 @@ func TestMultiClusterMonitoringCRUpdate(t *testing.T) { // Create empty client. The test secret specified in MCO is not yet created. t.Log("Reconcile empty client") - _, err := r.Reconcile(context.TODO(), req) + _, err = r.Reconcile(context.TODO(), req) if err != nil { t.Fatalf("reconcile: (%v)", err) } @@ -759,8 +787,32 @@ func TestImageReplaceForMCO(t *testing.T) { // Create a fake client to mock API calls. cl := fake.NewClientBuilder().WithRuntimeObjects(objs...).Build() + // Create fake imagestream client + imageClient := &fakeimagev1client.FakeImageV1{Fake: &(fakeimageclient.NewSimpleClientset().Fake)} + _, err := imageClient.ImageStreams(config.OauthProxyImageStreamNamespace).Create(context.Background(), + &imagev1.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: config.OauthProxyImageStreamName, + Namespace: config.OauthProxyImageStreamNamespace, + }, + Spec: imagev1.ImageStreamSpec{ + Tags: []imagev1.TagReference{ + { + Name: "v4.4", + From: &corev1.ObjectReference{ + Kind: "DockerImage", + Name: "quay.io/openshift-release-dev/ocp-v4.0-art-dev", + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + // Create a ReconcileMemcached object with the scheme and fake client. - r := &MultiClusterObservabilityReconciler{Client: cl, Scheme: s, CRDMap: map[string]bool{config.MCHCrdName: true, config.IngressControllerCRD: true}} + r := &MultiClusterObservabilityReconciler{Client: cl, Scheme: s, CRDMap: map[string]bool{config.MCHCrdName: true, config.IngressControllerCRD: true}, ImageClient: imageClient} config.SetMonitoringCRName(name) // Mock request to simulate Reconcile() being called on an event for a watched resource . @@ -775,7 +827,7 @@ func TestImageReplaceForMCO(t *testing.T) { config.SetImageManifests(testImagemanifestsMap) // trigger another reconcile for MCH update event - _, err := r.Reconcile(context.TODO(), req) + _, err = r.Reconcile(context.TODO(), req) if err != nil { t.Fatalf("reconcile: (%v)", err) } diff --git a/operators/multiclusterobservability/pkg/rendering/renderer.go b/operators/multiclusterobservability/pkg/rendering/renderer.go index b61ce6026..ef3651d5d 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer.go @@ -29,7 +29,7 @@ type RendererOptions struct { type MCORenderer struct { kubeClient client.Client - imageClient *imagev1client.ImageV1Client + imageClient imagev1client.ImageV1Interface renderer *rendererutil.Renderer cr *obv1beta2.MultiClusterObservability rendererOptions *RendererOptions @@ -40,7 +40,7 @@ type MCORenderer struct { renderMCOAFns map[string]rendererutil.RenderFn } -func NewMCORenderer(multipleClusterMonitoring *obv1beta2.MultiClusterObservability, kubeClient client.Client, imageClient *imagev1client.ImageV1Client) *MCORenderer { +func NewMCORenderer(multipleClusterMonitoring *obv1beta2.MultiClusterObservability, kubeClient client.Client, imageClient imagev1client.ImageV1Interface) *MCORenderer { mcoRenderer := &MCORenderer{ renderer: rendererutil.NewRenderer(), cr: multipleClusterMonitoring, diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager_test.go b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager_test.go index db3819cac..eed473b35 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager_test.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager_test.go @@ -5,6 +5,7 @@ package rendering import ( + "context" "fmt" "os" "path/filepath" @@ -12,6 +13,9 @@ import ( "strings" "testing" + imagev1 "github.com/openshift/api/image/v1" + fakeimageclient "github.com/openshift/client-go/image/clientset/versioned/fake" + fakeimagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1/fake" mcoshared "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/shared" mcov1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/pkg/config" @@ -288,7 +292,31 @@ func renderTemplates(t *testing.T, kubeClient client.Client, mco *mcov1beta2.Mul defer os.Unsetenv(templatesutil.TemplatesPathEnvVar) config.ReadImageManifestConfigMap(kubeClient, "v1") - renderer := NewMCORenderer(mco, kubeClient, nil) + + imageClient := &fakeimagev1client.FakeImageV1{Fake: &(fakeimageclient.NewSimpleClientset().Fake)} + _, err = imageClient.ImageStreams(config.OauthProxyImageStreamNamespace).Create(context.Background(), + &imagev1.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: config.OauthProxyImageStreamName, + Namespace: config.OauthProxyImageStreamNamespace, + }, + Spec: imagev1.ImageStreamSpec{ + Tags: []imagev1.TagReference{ + { + Name: "v4.4", + From: &corev1.ObjectReference{ + Kind: "DockerImage", + Name: "quay.io/openshift-release-dev/ocp-v4.0-art-dev", + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + + renderer := NewMCORenderer(mco, kubeClient, imageClient) //load and render alertmanager templates alertTemplates, err := templates.GetOrLoadAlertManagerTemplates(templatesutil.GetTemplateRenderer()) diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_test.go b/operators/multiclusterobservability/pkg/rendering/renderer_test.go index 313091832..49379b008 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_test.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_test.go @@ -66,7 +66,30 @@ func TestRender(t *testing.T) { } kubeClient := fake.NewClientBuilder().WithObjects(clientCa).Build() - renderer := NewMCORenderer(mchcr, kubeClient, nil) + imageClient := &fakeimagev1client.FakeImageV1{Fake: &(fakeimageclient.NewSimpleClientset().Fake)} + _, err = imageClient.ImageStreams(config.OauthProxyImageStreamNamespace).Create(context.Background(), + &imagev1.ImageStream{ + ObjectMeta: metav1.ObjectMeta{ + Name: config.OauthProxyImageStreamName, + Namespace: config.OauthProxyImageStreamNamespace, + }, + Spec: imagev1.ImageStreamSpec{ + Tags: []imagev1.TagReference{ + { + Name: "v4.4", + From: &corev1.ObjectReference{ + Kind: "DockerImage", + Name: "quay.io/openshift-release-dev/ocp-v4.0-art-dev", + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + t.Fatal(err) + } + + renderer := NewMCORenderer(mchcr, kubeClient, imageClient) _, err = renderer.Render() if err != nil { t.Fatalf("failed to render MultiClusterObservability: %v", err) From 748662897aea7c5f6b9fa10d2ff65f5fa48419d2 Mon Sep 17 00:00:00 2001 From: Jacob Baungard Hansen Date: Fri, 22 Nov 2024 12:15:32 +0100 Subject: [PATCH 3/3] Renderer: detect if imagestream is available Only try to get the oauth image from the OCP imagestream, if the image.openshift.io api type is available. Otherwise, it likely means we're running a non-ocp system (for kind tests for example) and in this case we accept using the base template image. Signed-off-by: Jacob Baungard Hansen --- .../pkg/rendering/renderer.go | 20 +++++++++++++++++++ .../pkg/rendering/renderer_alertmanager.go | 5 ++++- .../pkg/rendering/renderer_grafana.go | 7 +++++-- .../pkg/rendering/renderer_proxy.go | 7 +++++-- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/operators/multiclusterobservability/pkg/rendering/renderer.go b/operators/multiclusterobservability/pkg/rendering/renderer.go index ef3651d5d..ba2b5ce63 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer.go @@ -10,6 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/discovery" logf "sigs.k8s.io/controller-runtime/pkg/log" obv1beta2 "github.com/stolostron/multicluster-observability-operator/operators/multiclusterobservability/api/v1beta2" @@ -193,3 +194,22 @@ func (r *MCORenderer) MCOAResources(namespace string, labels map[string]string) return mcoaResources, nil } + +func (r *MCORenderer) HasImagestream() bool { + dcl := discovery.NewDiscoveryClient(r.imageClient.RESTClient()) + + apiList, err := dcl.ServerGroups() + if err != nil { + log.Error(err, "unable to get ServerGroups from imagestream detection") + return false + } + + apiGroups := apiList.Groups + for i := 0; i < len(apiGroups); i++ { + if apiGroups[i].Name == "image.openshift.io" { + return true + } + } + + return false +} diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go index 978feb25a..78a8e7ebf 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_alertmanager.go @@ -123,10 +123,13 @@ func (r *MCORenderer) renderAlertManagerStatefulSet(res *resource.Resource, name configReloaderContainer.Image = image } + // If we're on OCP and has imagestreams, we always want the oauth image + // from the imagestream, and fail the reconcile if we don't find it. + // If we're on non-OCP (tests) we use the base template image found, image = mcoconfig.GetOauthProxyImage(r.imageClient) if found { oauthProxyContainer.Image = image - } else { + } else if r.HasImagestream() { return nil, fmt.Errorf("failed to get OAuth image for alertmanager") } oauthProxyContainer.ImagePullPolicy = imagePullPolicy diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go b/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go index 49ef1de51..9e4a2d038 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_grafana.go @@ -70,11 +70,14 @@ func (r *MCORenderer) renderGrafanaDeployments(res *resource.Resource, } spec.Containers[1].ImagePullPolicy = imagePullPolicy + // If we're on OCP and has imagestreams, we always want the oauth image + // from the imagestream, and fail the reconcile if we don't find it. + // If we're on non-OCP (tests) we use the base template image found, image = config.GetOauthProxyImage(r.imageClient) if found { spec.Containers[2].Image = image - } else { - return nil, fmt.Errorf("failed to get OAuth image for Grafana") + } else if r.HasImagestream() { + return nil, fmt.Errorf("failed to get OAuth image for alertmanager") } spec.Containers[2].ImagePullPolicy = imagePullPolicy diff --git a/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go b/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go index d0cff6f17..8a9b711ce 100644 --- a/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go +++ b/operators/multiclusterobservability/pkg/rendering/renderer_proxy.go @@ -94,11 +94,14 @@ func (r *MCORenderer) renderProxyDeployment(res *resource.Resource, spec.Containers[0].Image = image } + // If we're on OCP and has imagestreams, we always want the oauth image + // from the imagestream, and fail the reconcile if we don't find it. + // If we're on non-OCP (tests) we use the base template image found, image = mcoconfig.GetOauthProxyImage(r.imageClient) if found { spec.Containers[1].Image = image - } else { - return nil, fmt.Errorf("failed to get OAuth image for rbacqueryproxy") + } else if r.HasImagestream() { + return nil, fmt.Errorf("failed to get OAuth image for alertmanager") } for idx := range spec.Volumes {