Skip to content

Commit

Permalink
Merge pull request #850 from qmhu/recommend-metrics
Browse files Browse the repository at this point in the history
Monitor recommendation metrics
  • Loading branch information
qmhu authored Aug 11, 2023
2 parents c3adee2 + ca2be38 commit 96c7e4f
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 3 deletions.
11 changes: 9 additions & 2 deletions cmd/craned/app/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func Run(ctx context.Context, opts *options.Options) error {
}
}()

initControllers(podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource])
initControllers(ctx, podOOMRecorder, mgr, opts, predictorMgr, historyDataSources[providers.PrometheusDataSource])
// initialize custom collector metrics
initMetricCollector(mgr)
runAll(ctx, mgr, predictorMgr, dataSourceProviders[providers.PrometheusDataSource], opts)
Expand Down Expand Up @@ -266,7 +266,7 @@ func initPredictorManager(opts *options.Options, realtimeDataSources map[provide
}

// initControllers setup controllers with manager
func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) {
func initControllers(ctx context.Context, oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) {
discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig())
if err != nil {
klog.Exit(err, "Unable to create discover client")
Expand Down Expand Up @@ -417,6 +417,13 @@ func initControllers(oomRecorder oom.Recorder, mgr ctrl.Manager, opts *options.O
}).SetupWithManager(mgr); err != nil {
klog.Exit(err, "unable to create controller", "controller", "RecommendationTriggerController")
}

checker := recommendationctrl.Checker{
Client: mgr.GetClient(),
MonitorInterval: opts.MonitorInterval,
OutDateInterval: opts.OutDateInterval,
}
checker.Run(ctx.Done())
}

// CnpController
Expand Down
8 changes: 8 additions & 0 deletions cmd/craned/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ type Options struct {

// CacheUnstructured indicates whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects, but will increase memory usage.
CacheUnstructured bool

// MonitorInterval is the interval for recommendation checker
MonitorInterval time.Duration

// OutDateInterval is the checking interval for identify a recommendation is outdated
OutDateInterval time.Duration
}

// NewOptions builds an empty options.
Expand Down Expand Up @@ -139,4 +145,6 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.IntVar(&o.OOMRecordMaxNumber, "oom-record-max-number", 10000, "Max number for oom records to store in configmap")
flags.IntVar(&o.TimeSeriesPredictionMaxConcurrentReconciles, "time-series-prediction-max-concurrent-reconciles", 10, "Max concurrent reconciles for TimeSeriesPrediction controller")
flags.BoolVar(&o.CacheUnstructured, "cache-unstructured", true, "whether to cache Unstructured objects. When enabled, it will speed up reading Unstructured objects but will increase memory usage")
flags.DurationVar(&o.MonitorInterval, "recommendation-monitor-interval", time.Hour, "interval for recommendation checker")
flags.DurationVar(&o.OutDateInterval, "recommendation-outdate-interval", 24*time.Hour, "interval for identify a recommendation is outdated")
}
65 changes: 65 additions & 0 deletions pkg/controller/recommendation/recommendation_checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package recommendation

import (
"context"
"time"

"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"

analysisv1alpha1 "github.com/gocrane/api/analysis/v1alpha1"

"github.com/gocrane/crane/pkg/metrics"
)

type Checker struct {
client.Client
MonitorInterval time.Duration
OutDateInterval time.Duration
}

func (r Checker) Run(stopCh <-chan struct{}) {
go func() {
ticker := time.NewTicker(r.MonitorInterval)
defer ticker.Stop()

for {
select {
case <-stopCh:
return
case <-ticker.C:
r.runChecker()
}
}
}()
}

func (r Checker) runChecker() {
recommendList := &analysisv1alpha1.RecommendationList{}
err := r.Client.List(context.TODO(), recommendList, []client.ListOption{}...)
if err != nil {
klog.Errorf("Failed to list recommendation: %v", err)
}

for _, recommend := range recommendList.Items {
updateStatus := "Updated"
if time.Now().Sub(recommend.Status.LastUpdateTime.Time) > r.OutDateInterval {
updateStatus = "OutDate"
}

resultStatus := "Failed"
if len(recommend.Status.RecommendedInfo) != 0 || len(recommend.Status.RecommendedValue) != 0 {
resultStatus = "Success"
}

metrics.RecommendationsStatus.With(map[string]string{
"type": string(recommend.Spec.Type),
"apiversion": recommend.Spec.TargetRef.APIVersion,
"owner_kind": recommend.Spec.TargetRef.Kind,
"namespace": recommend.Spec.TargetRef.Namespace,
"owner_name": recommend.Spec.TargetRef.Name,
"update_status": updateStatus,
"result_status": resultStatus,
}).Set(1)
}
}
11 changes: 11 additions & 0 deletions pkg/controller/recommendation/recommendation_rule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1"

"github.com/gocrane/crane/pkg/known"
"github.com/gocrane/crane/pkg/metrics"
"github.com/gocrane/crane/pkg/oom"
predictormgr "github.com/gocrane/crane/pkg/predictor"
"github.com/gocrane/crane/pkg/providers"
Expand Down Expand Up @@ -309,6 +310,16 @@ func (c *RecommendationRuleController) getIdentities(ctx context.Context, recomm
}
}

for _, id := range identities {
metrics.SelectTargets.With(map[string]string{
"type": id.Recommender,
"apiversion": id.APIVersion,
"owner_kind": id.Kind,
"namespace": id.Namespace,
"owner_name": id.Name,
}).Set(1)
}

return identities, nil
}

Expand Down
22 changes: 21 additions & 1 deletion pkg/metrics/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,28 @@ var (
},
[]string{"apiversion", "owner_kind", "namespace", "owner_name"},
)

SelectTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "crane",
Subsystem: "analysis",
Name: "select_targets",
Help: "The number of selected targets",
},
[]string{"type", "apiversion", "owner_kind", "namespace", "owner_name"},
)

RecommendationsStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "crane",
Subsystem: "analysis",
Name: "recommendations_status",
Help: "The status of recommendations",
},
[]string{"type", "apiversion", "owner_kind", "namespace", "owner_name", "update_status", "result_status"},
)
)

func init() {
metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation)
metrics.Registry.MustRegister(ResourceRecommendation, ReplicasRecommendation, SelectTargets, RecommendationsStatus)
}

0 comments on commit 96c7e4f

Please sign in to comment.