Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AlertManager support #150

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
k8s.io/apiserver v0.30.2
k8s.io/client-go v0.30.2
k8s.io/klog/v2 v2.130.1
k8s.io/kube-aggregator v0.30.2
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0
kmodules.xyz/authorizer v0.29.1
kmodules.xyz/client-go v0.30.42
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kms v0.30.2 h1:VSZILO/tkzrz5Tu2j+yFQZ2Dc5JerQZX2GqhFJbQrfw=
k8s.io/kms v0.30.2/go.mod h1:GrMurD0qk3G4yNgGcsCEmepqf9KyyIrTXYR2lyUOJC4=
k8s.io/kube-aggregator v0.30.2 h1:0+yk/ED6foCprY8VmkDPUhngjaAPKsNTXB/UrtvbIz0=
k8s.io/kube-aggregator v0.30.2/go.mod h1:EhqCfDdxysNWXk1wRL9SEHAdo1DKl6EULQagztkBcXE=
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlLosk/JlfP3xQLNkQlHJeXw=
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc=
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
Expand Down
38 changes: 29 additions & 9 deletions pkg/apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,17 @@ import (
"go.openviz.dev/apimachinery/apis/ui"
uiinstall "go.openviz.dev/apimachinery/apis/ui/install"
uiapi "go.openviz.dev/apimachinery/apis/ui/v1alpha1"
alertmanagercontroller "go.openviz.dev/grafana-tools/pkg/controllers/alertmanager"
namespacecontroller "go.openviz.dev/grafana-tools/pkg/controllers/namespace"
promtehsucontroller "go.openviz.dev/grafana-tools/pkg/controllers/prometheus"
prometheuscontroller "go.openviz.dev/grafana-tools/pkg/controllers/prometheus"
"go.openviz.dev/grafana-tools/pkg/controllers/ranchertoken"
servicemonitorcontroller "go.openviz.dev/grafana-tools/pkg/controllers/servicemonitor"
"go.openviz.dev/grafana-tools/pkg/detector"
dashgroupstorage "go.openviz.dev/grafana-tools/pkg/registry/ui/dashboardgroup"

"github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1beta1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1beta1"
core "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -49,6 +51,7 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
"kmodules.xyz/authorizer"
"kmodules.xyz/client-go/apiextensions"
clustermeta "kmodules.xyz/client-go/cluster"
Expand All @@ -74,7 +77,9 @@ func init() {
uiinstall.Install(Scheme)
openvizinstall.Install(Scheme)
utilruntime.Must(clientgoscheme.AddToScheme(Scheme))
utilruntime.Must(apiregistrationv1.AddToScheme(Scheme))
utilruntime.Must(monitoringv1.AddToScheme(Scheme))
utilruntime.Must(monitoringv1beta1.AddToScheme(Scheme))
utilruntime.Must(appcatalogapi.AddToScheme(Scheme))
utilruntime.Must(chartsapi.AddToScheme(Scheme))
utilruntime.Must(apiextensionsv1.AddToScheme(Scheme))
Expand Down Expand Up @@ -174,9 +179,9 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
return nil, err
}

var bc *promtehsucontroller.Client
var bc *prometheuscontroller.Client
if c.ExtraConfig.BaseURL != "" && c.ExtraConfig.Token != "" {
bc, err = promtehsucontroller.NewClient(c.ExtraConfig.BaseURL, c.ExtraConfig.Token, c.ExtraConfig.CACert)
bc, err = prometheuscontroller.NewClient(c.ExtraConfig.BaseURL, c.ExtraConfig.Token, c.ExtraConfig.CACert)
if err != nil {
return nil, err
}
Expand All @@ -187,7 +192,8 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
os.Exit(1)
}

d := detector.New(mgr.GetClient())
promDetector := detector.NewPrometheusDetector(mgr.GetClient())
amgrDetector := detector.NewAlertmanagerDetector(mgr.GetClient())

if c.ExtraConfig.RancherAuthSecret != "" {
if err = ranchertoken.NewTokenRefresher(
Expand All @@ -208,25 +214,39 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
bc,
cid,
c.ExtraConfig.HubUID,
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "ClientOrg")
os.Exit(1)
}

if err = promtehsucontroller.NewReconciler(
if err = prometheuscontroller.NewReconciler(
mgr.GetClient(),
bc,
cid,
c.ExtraConfig.HubUID,
c.ExtraConfig.RancherAuthSecret,
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "Prometheus")
os.Exit(1)
}
})

apiextensions.RegisterSetup(schema.GroupKind{
Group: monitoring.GroupName,
Kind: monitoringv1.AlertmanagersKind,
}, func(ctx context.Context, mgr ctrl.Manager) {
if err = alertmanagercontroller.NewReconciler(
mgr.GetClient(),
cid,
amgrDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "Alertmanagers")
os.Exit(1)
}
})

apiextensions.RegisterSetup(schema.GroupKind{
Group: monitoring.GroupName,
Kind: monitoringv1.ServiceMonitorsKind,
Expand All @@ -242,7 +262,7 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
if err = servicemonitorcontroller.NewFederationReconciler(
c.ExtraConfig.ClientConfig,
mgr.GetClient(),
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", " federation controller", "ServiceMonitor")
os.Exit(1)
Expand Down Expand Up @@ -302,7 +322,7 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
apiGroupInfo := genericapiserver.NewDefaultAPIGroupInfo(ui.GroupName, Scheme, metav1.ParameterCodec, Codecs)

v1alpha1storage := map[string]rest.Storage{}
v1alpha1storage[uiapi.ResourceDashboardGroups] = dashgroupstorage.NewStorage(ctrlClient, rbacAuthorizer, d)
v1alpha1storage[uiapi.ResourceDashboardGroups] = dashgroupstorage.NewStorage(ctrlClient, rbacAuthorizer, promDetector)
apiGroupInfo.VersionedResourcesStorageMap["v1alpha1"] = v1alpha1storage

if err := s.GenericAPIServer.InstallAPIGroup(&apiGroupInfo); err != nil {
Expand Down
199 changes: 199 additions & 0 deletions pkg/controllers/alertmanager/alertmanager_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
Copyright AppsCode Inc. and Contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package alertmanager

import (
"context"
"fmt"

"go.openviz.dev/grafana-tools/pkg/detector"

monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1beta1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
"k8s.io/utils/ptr"
cu "kmodules.xyz/client-go/client"
meta_util "kmodules.xyz/client-go/meta"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// v1alpha1.inbox.monitoring.appscode.com monitoring/inbox-agent
// Grant permission to alertmanager to call webhook

const (
inboxAPIServiceGroup = "inbox.monitoring.appscode.com"
amcfgInboxAgent = "inbox-agent"
)

var selfNamespace = meta_util.PodNamespace()

var defaultPresetsLabels = map[string]string{
"charts.x-helm.dev/is-default-preset": "true",
}

// AlertmanagerReconciler reconciles an Alertmanager object
type AlertmanagerReconciler struct {
kc client.Client
scheme *runtime.Scheme
clusterUID string
d detector.AlertmanagerDetector
}

func NewReconciler(kc client.Client, clusterUID string, d detector.AlertmanagerDetector) *AlertmanagerReconciler {
return &AlertmanagerReconciler{
kc: kc,
scheme: kc.Scheme(),
clusterUID: clusterUID,
d: d,
}
}

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the Alertmanager object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *AlertmanagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)

var am monitoringv1.Alertmanager
if err := r.kc.Get(ctx, req.NamespacedName, &am); err != nil {
log.Error(err, "unable to fetch Alertmanager")
// we'll ignore not-found errors, since they can't be fixed by an immediate
// requeue (we'll need to wait for a new notification), and we can get them
// on deleted requests.
return ctrl.Result{}, client.IgnoreNotFound(err)
}

if ready, err := r.d.Ready(); !ready {
return ctrl.Result{}, err
}

if am.DeletionTimestamp != nil {
return ctrl.Result{}, nil
}

apisvc, err := r.GetInboxAPIService(ctx)
if err != nil || apisvc == nil {
return ctrl.Result{}, err
}

if err := r.SetupClusterForAlertmanager(ctx, &am, apisvc); err != nil {
log.Error(err, "unable to setup Alertmanager config")
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *AlertmanagerReconciler) SetupClusterForAlertmanager(ctx context.Context, am *monitoringv1.Alertmanager, apisvc *apiregistrationv1.APIService) error {
cr := monitoringv1beta1.AlertmanagerConfig{
ObjectMeta: metav1.ObjectMeta{
Name: amcfgInboxAgent,
Namespace: am.Namespace,
},
}
crvt, err := cu.CreateOrPatch(ctx, r.kc, &cr, func(in client.Object, createOp bool) client.Object {
obj := in.(*monitoringv1beta1.AlertmanagerConfig)

obj.Spec.Receivers = []monitoringv1beta1.Receiver{
{
Name: "webhook",
WebhookConfigs: []monitoringv1beta1.WebhookConfig{
{
SendResolved: ptr.To(true),
URL: ptr.To(fmt.Sprintf("https://%s.%s.svc:443/alerts", apisvc.Spec.Service.Name, apisvc.Spec.Service.Namespace)),
HTTPConfig: &monitoringv1beta1.HTTPConfig{
TLSConfig: &monitoringv1.SafeTLSConfig{
InsecureSkipVerify: ptr.To(true),
},
},
MaxAlerts: 0,
},
},
},
}

obj.Spec.Route = &monitoringv1beta1.Route{
GroupBy: []string{"job"},
GroupWait: "10s",
GroupInterval: "1m",
Receiver: "webhook",
RepeatInterval: "1h",
}

return obj
})
if err != nil {
return err
}
klog.Infof("%s AlertmanagerConfig %s", crvt, cr.Name)

return nil
}

func (r *AlertmanagerReconciler) GetInboxAPIService(ctx context.Context) (*apiregistrationv1.APIService, error) {
var list apiregistrationv1.APIServiceList
err := r.kc.List(ctx, &list)
if err != nil {
return nil, err
}
for _, apisvc := range list.Items {
if apisvc.Spec.Group == inboxAPIServiceGroup {
return &apisvc, nil
}
}
return nil, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *AlertmanagerReconciler) SetupWithManager(mgr ctrl.Manager) error {
stateHandler := handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request {
var amList monitoringv1.AlertmanagerList
err := r.kc.List(ctx, &amList)
if err != nil {
return nil
}

var req []reconcile.Request
for _, am := range amList.Items {
req = append(req, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&am)})
}
return req
})

return ctrl.NewControllerManagedBy(mgr).
For(&monitoringv1.Alertmanager{}).
Watches(&apiregistrationv1.APIService{}, stateHandler, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
apisvc := obj.(*apiregistrationv1.APIService)
return apisvc.Spec.Group == inboxAPIServiceGroup
}))).
Complete(r)
}
4 changes: 2 additions & 2 deletions pkg/controllers/namespace/namespace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ type ClientOrgReconciler struct {
bc *prometheus.Client
clusterUID string
hubUID string
d detector.Detector
d detector.PrometheusDetector
}

func NewReconciler(kc client.Client, bc *prometheus.Client, clusterUID, hubUID string, d detector.Detector) *ClientOrgReconciler {
func NewReconciler(kc client.Client, bc *prometheus.Client, clusterUID, hubUID string, d detector.PrometheusDetector) *ClientOrgReconciler {
return &ClientOrgReconciler{
kc: kc,
scheme: kc.Scheme(),
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/prometheus/prometheus_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ type PrometheusReconciler struct {
clusterUID string
hubUID string
rancherAuthSecretName string
d detector.Detector
d detector.PrometheusDetector
}

func NewReconciler(kc client.Client, bc *Client, clusterUID, hubUID, rancherAuthSecretName string, d detector.Detector) *PrometheusReconciler {
func NewReconciler(kc client.Client, bc *Client, clusterUID, hubUID, rancherAuthSecretName string, d detector.PrometheusDetector) *PrometheusReconciler {
return &PrometheusReconciler{
kc: kc,
scheme: kc.Scheme(),
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/servicemonitor/federation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ type FederationReconciler struct {
cfg *rest.Config
kc client.Client
scheme *runtime.Scheme
d detector.Detector
d detector.PrometheusDetector
}

func NewFederationReconciler(cfg *rest.Config, kc client.Client, d detector.Detector) *FederationReconciler {
func NewFederationReconciler(cfg *rest.Config, kc client.Client, d detector.PrometheusDetector) *FederationReconciler {
return &FederationReconciler{
cfg: cfg,
kc: kc,
Expand Down
Loading
Loading