From 6df335747da2bcc337b28fb98afb8b07cd8d5269 Mon Sep 17 00:00:00 2001 From: qmhu Date: Thu, 10 Aug 2023 19:55:58 +0800 Subject: [PATCH 1/2] monitor recommendation metrics --- cmd/craned/app/manager.go | 11 +++- cmd/craned/app/options/options.go | 8 +++ .../recommendation/recommendation_checker.go | 62 +++++++++++++++++++ .../recommendation_rule_controller.go | 11 ++++ pkg/metrics/analysis.go | 22 ++++++- 5 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 pkg/controller/recommendation/recommendation_checker.go diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index 57dfac9ef..d95c9ffcb 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -143,7 +143,7 @@ func Run(ctx context.Context, opts *options.Options) error { } }() - initControllers(podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource]) + initControllers(ctx, podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource]) // initialize custom collector metrics initMetricCollector(mgr) runAll(ctx, mgr, predictorMgr, dataSourceProviders[providers.PrometheusDataSource], opts) @@ -266,7 +266,7 @@ func initPredictorManager(opts *options.Options, realtimeDataSources map[provide } // initControllers setup controllers with manager -func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) { +func initControllers(ctx context.Context, oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) { discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) if err != nil { klog.Exit(err, "Unable to create discover client") @@ -417,6 +417,13 @@ func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.O }).SetupWithManager(mgr); err != nil { klog.Exit(err, "unable to create controller", "controller", "RecommendationTriggerController") } + + checker := recommendationctrl.Checker{ + Client: mgr.GetClient(), + MonitorInterval: opts.MonitorInterval, + OutDateInterval: opts.OutDateInterval, + } + checker.Run(ctx.Done()) } // CnpController diff --git a/cmd/craned/app/options/options.go b/cmd/craned/app/options/options.go index e2094b54a..47299b2f5 100644 --- a/cmd/craned/app/options/options.go +++ b/cmd/craned/app/options/options.go @@ -67,6 +67,12 @@ type Options struct { // CacheUnstructured indicates whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects, but will increase memory usage. CacheUnstructured bool + + // MonitorInterval is the interval for recommendation checker + MonitorInterval time.Duration + + // OutDateInterval is the checking interval for identify a recommendation is outdated + OutDateInterval time.Duration } // NewOptions builds an empty options. @@ -139,4 +145,6 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.IntVar(&o.OOMRecordMaxNumber, "oom-record-max-number", 10000, "Max number for oom records to store in configmap") flags.IntVar(&o.TimeSeriesPredictionMaxConcurrentReconciles, "time-series-prediction-max-concurrent-reconciles", 10, "Max concurrent reconciles for TimeSeriesPrediction controller") flags.BoolVar(&o.CacheUnstructured, "cache-unstructured", true, "whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects but will increase memory usage") + flags.DurationVar(&o.MonitorInterval, "recommendation-monitor-interval", time.Hour, "interval for recommendation checker") + flags.DurationVar(&o.OutDateInterval, "recommendation-outdate-interval", 24*time.Hour, "interval for identify a recommendation is outdated") } diff --git a/pkg/controller/recommendation/recommendation_checker.go b/pkg/controller/recommendation/recommendation_checker.go new file mode 100644 index 000000000..804424659 --- /dev/null +++ b/pkg/controller/recommendation/recommendation_checker.go @@ -0,0 +1,62 @@ +package recommendation + +import ( + "context" + "time" + + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + analysisv1alpha1 "github.com/gocrane/api/analysis/v1alpha1" + + "github.com/gocrane/crane/pkg/metrics" +) + +type Checker struct { + client.Client + MonitorInterval time.Duration + OutDateInterval time.Duration +} + +func (r Checker) Run(stopCh <-chan struct{}) { + go func() { + for { + select { + case <-stopCh: + return + case <-time.Tick(r.MonitorInterval): + r.runChecker() + } + } + }() +} + +func (r Checker) runChecker() { + recommendList := &analysisv1alpha1.RecommendationList{} + err := r.Client.List(context.TODO(), recommendList, []client.ListOption{}...) + if err != nil { + klog.Errorf("Failed to list recommendation: %v", err) + } + + for _, recommend := range recommendList.Items { + updateStatus := "Updated" + if time.Now().Sub(recommend.Status.LastUpdateTime.Time) > r.OutDateInterval { + updateStatus = "OutDate" + } + + resultStatus := "Failed" + if len(recommend.Status.RecommendedInfo) != 0 || len(recommend.Status.RecommendedValue) != 0 { + resultStatus = "Success" + } + + metrics.RecommendationsStatus.With(map[string]string{ + "type": string(recommend.Spec.Type), + "apiversion": recommend.Spec.TargetRef.APIVersion, + "owner_kind": recommend.Spec.TargetRef.Kind, + "namespace": recommend.Spec.TargetRef.Namespace, + "owner_name": recommend.Spec.TargetRef.Name, + "update_status": updateStatus, + "result_status": resultStatus, + }).Set(1) + } +} diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index 2f6d8d082..cdbcfe922 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,6 +3,7 @@ package recommendation import ( "context" "fmt" + "github.com/gocrane/crane/pkg/metrics" "sort" "strconv" "strings" @@ -309,6 +310,16 @@ func (c *RecommendationRuleController) getIdentities(ctx context.Context, recomm } } + for _, id := range identities { + metrics.SelectTargets.With(map[string]string{ + "type": id.Recommender, + "apiversion": id.APIVersion, + "owner_kind": id.Kind, + "namespace": id.Namespace, + "owner_name": id.Name, + }).Set(1) + } + return identities, nil } diff --git a/pkg/metrics/analysis.go b/pkg/metrics/analysis.go index 10b37cee2..bd930a474 100644 --- a/pkg/metrics/analysis.go +++ b/pkg/metrics/analysis.go @@ -25,8 +25,28 @@ var ( }, []string{"apiversion", "owner_kind", "namespace", "owner_name"}, ) + + SelectTargets = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "crane", + Subsystem: "analysis", + Name: "select_targets", + Help: "The number of selected targets", + }, + []string{"type", "apiversion", "owner_kind", "namespace", "owner_name"}, + ) + + RecommendationsStatus = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "crane", + Subsystem: "analysis", + Name: "recommendations_status", + Help: "The status of recommendations", + }, + []string{"type", "apiversion", "owner_kind", "namespace", "owner_name", "update_status", "result_status"}, + ) ) func init() { - metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation) + metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation, SelectTargets, RecommendationsStatus) } From ca2be388c8628ca5e990ed37ecb9521af39d6223 Mon Sep 17 00:00:00 2001 From: qmhu Date: Fri, 11 Aug 2023 16:10:53 +0800 Subject: [PATCH 2/2] fix lint --- pkg/controller/recommendation/recommendation_checker.go | 5 ++++- .../recommendation/recommendation_rule_controller.go | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/controller/recommendation/recommendation_checker.go b/pkg/controller/recommendation/recommendation_checker.go index 804424659..c3ebfa064 100644 --- a/pkg/controller/recommendation/recommendation_checker.go +++ b/pkg/controller/recommendation/recommendation_checker.go @@ -20,11 +20,14 @@ type Checker struct { func (r Checker) Run(stopCh <-chan struct{}) { go func() { + ticker := time.NewTicker(r.MonitorInterval) + defer ticker.Stop() + for { select { case <-stopCh: return - case <-time.Tick(r.MonitorInterval): + case <-ticker.C: r.runChecker() } } diff --git a/pkg/controller/recommendation/recommendation_rule_controller.go b/pkg/controller/recommendation/recommendation_rule_controller.go index cdbcfe922..f3e9a76e7 100644 --- a/pkg/controller/recommendation/recommendation_rule_controller.go +++ b/pkg/controller/recommendation/recommendation_rule_controller.go @@ -3,7 +3,6 @@ package recommendation import ( "context" "fmt" - "github.com/gocrane/crane/pkg/metrics" "sort" "strconv" "strings" @@ -33,6 +32,7 @@ import ( analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" "github.com/gocrane/crane/pkg/known" + "github.com/gocrane/crane/pkg/metrics" "github.com/gocrane/crane/pkg/oom" predictormgr "github.com/gocrane/crane/pkg/predictor" "github.com/gocrane/crane/pkg/providers"