Skip to content

Commit

Permalink
Add AlertManager support
Browse files Browse the repository at this point in the history
Signed-off-by: Tamal Saha <[email protected]>
  • Loading branch information
tamalsaha committed Dec 16, 2024
1 parent 528f002 commit a1ef967
Show file tree
Hide file tree
Showing 41 changed files with 13,693 additions and 43 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
k8s.io/apiserver v0.30.2
k8s.io/client-go v0.30.2
k8s.io/klog/v2 v2.130.1
k8s.io/kube-aggregator v0.30.2
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0
kmodules.xyz/authorizer v0.29.1
kmodules.xyz/client-go v0.30.42
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kms v0.30.2 h1:VSZILO/tkzrz5Tu2j+yFQZ2Dc5JerQZX2GqhFJbQrfw=
k8s.io/kms v0.30.2/go.mod h1:GrMurD0qk3G4yNgGcsCEmepqf9KyyIrTXYR2lyUOJC4=
k8s.io/kube-aggregator v0.30.2 h1:0+yk/ED6foCprY8VmkDPUhngjaAPKsNTXB/UrtvbIz0=
k8s.io/kube-aggregator v0.30.2/go.mod h1:EhqCfDdxysNWXk1wRL9SEHAdo1DKl6EULQagztkBcXE=
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlLosk/JlfP3xQLNkQlHJeXw=
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc=
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
Expand Down
38 changes: 29 additions & 9 deletions pkg/apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,17 @@ import (
"go.openviz.dev/apimachinery/apis/ui"
uiinstall "go.openviz.dev/apimachinery/apis/ui/install"
uiapi "go.openviz.dev/apimachinery/apis/ui/v1alpha1"
alertmanagercontroller "go.openviz.dev/grafana-tools/pkg/controllers/alertmanager"
namespacecontroller "go.openviz.dev/grafana-tools/pkg/controllers/namespace"
promtehsucontroller "go.openviz.dev/grafana-tools/pkg/controllers/prometheus"
prometheuscontroller "go.openviz.dev/grafana-tools/pkg/controllers/prometheus"
"go.openviz.dev/grafana-tools/pkg/controllers/ranchertoken"
servicemonitorcontroller "go.openviz.dev/grafana-tools/pkg/controllers/servicemonitor"
"go.openviz.dev/grafana-tools/pkg/detector"
dashgroupstorage "go.openviz.dev/grafana-tools/pkg/registry/ui/dashboardgroup"

"github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1beta1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1beta1"
core "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -49,6 +51,7 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
restclient "k8s.io/client-go/rest"
"k8s.io/klog/v2"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
"kmodules.xyz/authorizer"
"kmodules.xyz/client-go/apiextensions"
clustermeta "kmodules.xyz/client-go/cluster"
Expand All @@ -74,7 +77,9 @@ func init() {
uiinstall.Install(Scheme)
openvizinstall.Install(Scheme)
utilruntime.Must(clientgoscheme.AddToScheme(Scheme))
utilruntime.Must(apiregistrationv1.AddToScheme(Scheme))
utilruntime.Must(monitoringv1.AddToScheme(Scheme))
utilruntime.Must(monitoringv1beta1.AddToScheme(Scheme))
utilruntime.Must(appcatalogapi.AddToScheme(Scheme))
utilruntime.Must(chartsapi.AddToScheme(Scheme))
utilruntime.Must(apiextensionsv1.AddToScheme(Scheme))
Expand Down Expand Up @@ -174,9 +179,9 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
return nil, err
}

var bc *promtehsucontroller.Client
var bc *prometheuscontroller.Client
if c.ExtraConfig.BaseURL != "" && c.ExtraConfig.Token != "" {
bc, err = promtehsucontroller.NewClient(c.ExtraConfig.BaseURL, c.ExtraConfig.Token, c.ExtraConfig.CACert)
bc, err = prometheuscontroller.NewClient(c.ExtraConfig.BaseURL, c.ExtraConfig.Token, c.ExtraConfig.CACert)
if err != nil {
return nil, err
}
Expand All @@ -187,7 +192,8 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
os.Exit(1)
}

d := detector.New(mgr.GetClient())
promDetector := detector.NewPrometheusDetector(mgr.GetClient())
amgrDetector := detector.NewAlertmanagerDetector(mgr.GetClient())

if c.ExtraConfig.RancherAuthSecret != "" {
if err = ranchertoken.NewTokenRefresher(
Expand All @@ -208,25 +214,39 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
bc,
cid,
c.ExtraConfig.HubUID,
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "ClientOrg")
os.Exit(1)
}

if err = promtehsucontroller.NewReconciler(
if err = prometheuscontroller.NewReconciler(
mgr.GetClient(),
bc,
cid,
c.ExtraConfig.HubUID,
c.ExtraConfig.RancherAuthSecret,
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "Prometheus")
os.Exit(1)
}
})

apiextensions.RegisterSetup(schema.GroupKind{
Group: monitoring.GroupName,
Kind: monitoringv1.AlertmanagersKind,
}, func(ctx context.Context, mgr ctrl.Manager) {
if err = alertmanagercontroller.NewReconciler(
mgr.GetClient(),
cid,
amgrDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", "controller", "Alertmanagers")
os.Exit(1)
}
})

apiextensions.RegisterSetup(schema.GroupKind{
Group: monitoring.GroupName,
Kind: monitoringv1.ServiceMonitorsKind,
Expand All @@ -242,7 +262,7 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
if err = servicemonitorcontroller.NewFederationReconciler(
c.ExtraConfig.ClientConfig,
mgr.GetClient(),
d,
promDetector,
).SetupWithManager(mgr); err != nil {
klog.Error(err, "unable to create controller", " federation controller", "ServiceMonitor")
os.Exit(1)
Expand Down Expand Up @@ -302,7 +322,7 @@ func (c completedConfig) New(ctx context.Context) (*UIServer, error) {
apiGroupInfo := genericapiserver.NewDefaultAPIGroupInfo(ui.GroupName, Scheme, metav1.ParameterCodec, Codecs)

v1alpha1storage := map[string]rest.Storage{}
v1alpha1storage[uiapi.ResourceDashboardGroups] = dashgroupstorage.NewStorage(ctrlClient, rbacAuthorizer, d)
v1alpha1storage[uiapi.ResourceDashboardGroups] = dashgroupstorage.NewStorage(ctrlClient, rbacAuthorizer, promDetector)
apiGroupInfo.VersionedResourcesStorageMap["v1alpha1"] = v1alpha1storage

if err := s.GenericAPIServer.InstallAPIGroup(&apiGroupInfo); err != nil {
Expand Down
199 changes: 199 additions & 0 deletions pkg/controllers/alertmanager/alertmanager_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
Copyright AppsCode Inc. and Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package alertmanager

import (
"context"
"fmt"

"go.openviz.dev/grafana-tools/pkg/detector"

monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1beta1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
"k8s.io/utils/ptr"
cu "kmodules.xyz/client-go/client"
meta_util "kmodules.xyz/client-go/meta"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// v1alpha1.inbox.monitoring.appscode.com monitoring/inbox-agent
// Grant permission to alertmanager to call webhook

const (
inboxAPIServiceGroup = "inbox.monitoring.appscode.com"
amcfgInboxAgent = "inbox-agent"
)

var selfNamespace = meta_util.PodNamespace()

var defaultPresetsLabels = map[string]string{
"charts.x-helm.dev/is-default-preset": "true",
}

// AlertmanagerReconciler reconciles an Alertmanager object
type AlertmanagerReconciler struct {
kc client.Client
scheme *runtime.Scheme
clusterUID string
d detector.AlertmanagerDetector
}

func NewReconciler(kc client.Client, clusterUID string, d detector.AlertmanagerDetector) *AlertmanagerReconciler {
return &AlertmanagerReconciler{
kc: kc,
scheme: kc.Scheme(),
clusterUID: clusterUID,
d: d,
}
}

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the Alertmanager object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *AlertmanagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)

var am monitoringv1.Alertmanager
if err := r.kc.Get(ctx, req.NamespacedName, &am); err != nil {
log.Error(err, "unable to fetch Alertmanager")
// we'll ignore not-found errors, since they can't be fixed by an immediate
// requeue (we'll need to wait for a new notification), and we can get them
// on deleted requests.
return ctrl.Result{}, client.IgnoreNotFound(err)
}

if ready, err := r.d.Ready(); !ready {
return ctrl.Result{}, err
}

if am.DeletionTimestamp != nil {
return ctrl.Result{}, nil
}

apisvc, err := r.GetInboxAPIService(ctx)
if err != nil || apisvc == nil {
return ctrl.Result{}, err
}

if err := r.SetupClusterForAlertmanager(ctx, &am, apisvc); err != nil {
log.Error(err, "unable to setup Alertmanager config")
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *AlertmanagerReconciler) SetupClusterForAlertmanager(ctx context.Context, am *monitoringv1.Alertmanager, apisvc *apiregistrationv1.APIService) error {
cr := monitoringv1beta1.AlertmanagerConfig{
ObjectMeta: metav1.ObjectMeta{
Name: amcfgInboxAgent,
Namespace: am.Namespace,
},
}
crvt, err := cu.CreateOrPatch(ctx, r.kc, &cr, func(in client.Object, createOp bool) client.Object {
obj := in.(*monitoringv1beta1.AlertmanagerConfig)

obj.Spec.Receivers = []monitoringv1beta1.Receiver{
{
Name: "webhook",
WebhookConfigs: []monitoringv1beta1.WebhookConfig{
{
SendResolved: ptr.To(true),
URL: ptr.To(fmt.Sprintf("https://%s.%s.svc:443/alerts", apisvc.Spec.Service.Name, apisvc.Spec.Service.Namespace)),
HTTPConfig: &monitoringv1beta1.HTTPConfig{
TLSConfig: &monitoringv1.SafeTLSConfig{
InsecureSkipVerify: ptr.To(true),
},
},
MaxAlerts: 0,
},
},
},
}

obj.Spec.Route = &monitoringv1beta1.Route{
GroupBy: []string{"job"},
GroupWait: "10s",
GroupInterval: "1m",
Receiver: "webhook",
RepeatInterval: "1h",
}

return obj
})
if err != nil {
return err
}
klog.Infof("%s AlertmanagerConfig %s", crvt, cr.Name)

return nil
}

func (r *AlertmanagerReconciler) GetInboxAPIService(ctx context.Context) (*apiregistrationv1.APIService, error) {
var list apiregistrationv1.APIServiceList
err := r.kc.List(ctx, &list)
if err != nil {
return nil, err
}
for _, apisvc := range list.Items {
if apisvc.Spec.Group == inboxAPIServiceGroup {
return &apisvc, nil
}
}
return nil, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *AlertmanagerReconciler) SetupWithManager(mgr ctrl.Manager) error {
stateHandler := handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request {
var amList monitoringv1.AlertmanagerList
err := r.kc.List(ctx, &amList)
if err != nil {
return nil
}

var req []reconcile.Request
for _, am := range amList.Items {
req = append(req, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&am)})
}
return req
})

return ctrl.NewControllerManagedBy(mgr).
For(&monitoringv1.Alertmanager{}).
Watches(&apiregistrationv1.APIService{}, stateHandler, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool {
apisvc := obj.(*apiregistrationv1.APIService)
return apisvc.Spec.Group == inboxAPIServiceGroup
}))).
Complete(r)
}
4 changes: 2 additions & 2 deletions pkg/controllers/namespace/namespace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ type ClientOrgReconciler struct {
bc *prometheus.Client
clusterUID string
hubUID string
d detector.Detector
d detector.PrometheusDetector
}

func NewReconciler(kc client.Client, bc *prometheus.Client, clusterUID, hubUID string, d detector.Detector) *ClientOrgReconciler {
func NewReconciler(kc client.Client, bc *prometheus.Client, clusterUID, hubUID string, d detector.PrometheusDetector) *ClientOrgReconciler {
return &ClientOrgReconciler{
kc: kc,
scheme: kc.Scheme(),
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/prometheus/prometheus_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ type PrometheusReconciler struct {
clusterUID string
hubUID string
rancherAuthSecretName string
d detector.Detector
d detector.PrometheusDetector
}

func NewReconciler(kc client.Client, bc *Client, clusterUID, hubUID, rancherAuthSecretName string, d detector.Detector) *PrometheusReconciler {
func NewReconciler(kc client.Client, bc *Client, clusterUID, hubUID, rancherAuthSecretName string, d detector.PrometheusDetector) *PrometheusReconciler {
return &PrometheusReconciler{
kc: kc,
scheme: kc.Scheme(),
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/servicemonitor/federation_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ type FederationReconciler struct {
cfg *rest.Config
kc client.Client
scheme *runtime.Scheme
d detector.Detector
d detector.PrometheusDetector
}

func NewFederationReconciler(cfg *rest.Config, kc client.Client, d detector.Detector) *FederationReconciler {
func NewFederationReconciler(cfg *rest.Config, kc client.Client, d detector.PrometheusDetector) *FederationReconciler {
return &FederationReconciler{
cfg: cfg,
kc: kc,
Expand Down
Loading

0 comments on commit a1ef967

Please sign in to comment.