Skip to content

Commit

Permalink
added kuberhealthy agent part
Browse files Browse the repository at this point in the history
  • Loading branch information
vijeyash1 committed Feb 9, 2024
1 parent 1caeaf0 commit 224017a
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 2 deletions.
16 changes: 16 additions & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package config

import (
"time"

"github.com/kelseyhightower/envconfig"
"github.com/pkg/errors"
)
Expand All @@ -15,6 +17,7 @@ type AgentConfigurations struct {
KubePreUpgradeInterval string `envconfig:"KUBEPREUPGRADE_INTERVAL" default:"*/60 * * * *"`
TrivyInterval string `envconfig:"TRIVY_INTERVAL" default:"*/10 * * * *"`
SchedulerEnable bool `envconfig:"SCHEDULER_ENABLE" default:"true"`
KuberHealthyEnable bool `envconfig:"KUBERHEALTHY_ENABLE" default:"true"`
}

func GetAgentConfigurations() (serviceConf *AgentConfigurations, err error) {
Expand All @@ -24,3 +27,16 @@ func GetAgentConfigurations() (serviceConf *AgentConfigurations, err error) {
}
return
}

type KHConfig struct {
KuberhealthyURL string `envconfig:"KUBERHEALTHY_URL" required:"true"`
PollInterval time.Duration `envconfig:"POLL_INTERVAL" default:"15m"`
}

func GetKuberHealthyConfig() (khconfig *KHConfig, err error) {
khconfig = &KHConfig{}
if err = envconfig.Process("", khconfig); err != nil {
return nil, errors.WithStack(err)
}
return
}
7 changes: 5 additions & 2 deletions agent/kubviz/k8smetrics_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/intelops/kubviz/agent/kubviz/plugins/ketall"
"github.com/intelops/kubviz/agent/kubviz/plugins/kubepreupgrade"

"github.com/intelops/kubviz/agent/kubviz/plugins/kuberhealthy"
"github.com/intelops/kubviz/agent/kubviz/plugins/kubescore"
"github.com/intelops/kubviz/agent/kubviz/plugins/outdated"
"github.com/intelops/kubviz/agent/kubviz/plugins/rakkess"
Expand Down Expand Up @@ -52,8 +53,7 @@ const (
var (
ClusterName string = os.Getenv("CLUSTER_NAME")
token string = os.Getenv("NATS_TOKEN")

natsurl string = os.Getenv("NATS_ADDRESS")
natsurl string = os.Getenv("NATS_ADDRESS")

//for local testing provide the location of kubeconfig
cluster_conf_loc string = os.Getenv("CONFIG_LOCATION")
Expand Down Expand Up @@ -128,6 +128,9 @@ func main() {
}()

go events.PublishMetrics(clientset, js, clusterMetricsChan)
if cfg.KuberHealthyEnable {
go kuberhealthy.StartKuberHealthy(js)
}
go server.StartServer()
collectAndPublishMetrics := func() {
err := outdated.OutDatedImages(config, js)
Expand Down
98 changes: 98 additions & 0 deletions agent/kubviz/plugins/kuberhealthy/kuberhealthy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package kuberhealthy

import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strings"
"time"

"github.com/intelops/kubviz/agent/config"
"github.com/intelops/kubviz/constants"
"github.com/intelops/kubviz/model"
"github.com/intelops/kubviz/pkg/opentelemetry"
"github.com/nats-io/nats.go"
"go.opentelemetry.io/otel"
)

func StartKuberHealthy(js nats.JetStreamContext) {
khConfig, err := config.GetKuberHealthyConfig()
if err != nil {
log.Fatalf("Error getting Kuberhealthy config: %v", err)
}

ticker := time.NewTicker(khConfig.PollInterval)
defer ticker.Stop()

for range ticker.C {
if err := pollAndPublishKuberhealthy(khConfig.KuberhealthyURL, js); err != nil {
log.Printf("Error polling and publishing Kuberhealthy metrics: %v", err)
}
}
}
func pollAndPublishKuberhealthy(url string, js nats.JetStreamContext) error {
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("error making GET request to Kuberhealthy: %w", err)
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("error reading response body: %w", err)
}

var state model.State
if err := json.Unmarshal(body, &state); err != nil {
return fmt.Errorf("error unmarshaling response: %w", err)
}

return PublishKuberhealthyMetrics(js, state)
}
func boolToUInt8(b bool) uint8 {
if b {
return 1
}
return 0
}

func errorsToString(errors []string) string {
return strings.Join(errors, ", ")
}
func PublishKuberhealthyMetrics(js nats.JetStreamContext, state model.State) error {
ctx := context.Background()
tracer := otel.Tracer("kuberhealthy")
_, span := tracer.Start(opentelemetry.BuildContext(ctx), "PublishKuberhealthyMetrics")
defer span.End()

for checkName, details := range state.CheckDetails {
metrics := model.KuberhealthyCheckDetail{
CurrentUUID: details.CurrentUUID,
CheckName: checkName,
OK: boolToUInt8(details.OK),
Errors: errorsToString(details.Errors),
RunDuration: details.RunDuration,
Namespace: details.Namespace,
Node: details.Node,
LastRun: details.LastRun.Time,
AuthoritativePod: details.AuthoritativePod,
}

metricsJSON, err := json.Marshal(metrics)
if err != nil {
log.Printf("Error marshaling metrics of kuberhealthy %s: %v", checkName, err)
continue
}

if _, err := js.Publish(constants.KUBERHEALTHY_SUBJECT, metricsJSON); err != nil {
log.Printf("Error publishing metrics for kuberhealthy %s: %v", checkName, err)
continue
}
}

log.Printf("Kuberhealthy metrics have been published")
return nil
}
1 change: 1 addition & 0 deletions constants/constants.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package constants

const (
KUBERHEALTHY_SUBJECT = "METRICS.kuberhealthy"
KUBESCORE_SUBJECT = "METRICS.kubescore"
TRIVY_K8S_SUBJECT = "METRICS.trivyk8s"
StreamSubjects = "METRICS.*"
Expand Down
39 changes: 39 additions & 0 deletions model/kuberhealthy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package model

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type State struct {
OK bool `json:"OK"`
Errors []string `json:"Errors"`
CheckDetails map[string]WorkloadDetails `json:"CheckDetails"` // map of check names to last run timestamp
JobDetails map[string]WorkloadDetails `json:"JobDetails"` // map of job names to last run timestamp
CurrentMaster string `json:"CurrentMaster"`
Metadata map[string]string `json:"Metadata"`
}

type WorkloadDetails struct {
OK bool `json:"OK" yaml:"OK"` // true or false status of the khWorkload, whether or not it completed successfully
Errors []string `json:"Errors" yaml:"Errors"` // the list of errors reported from the khWorkload run
RunDuration string `json:"RunDuration" yaml:"RunDuration"` // the time it took for the khWorkload to complete
Namespace string `json:"Namespace" yaml:"Namespace"` // the namespace the khWorkload was run in
Node string `json:"Node" yaml:"Node"` // the node the khWorkload ran on
LastRun *metav1.Time `json:"LastRun,omitempty" yaml:"LastRun,omitempty"` // the time the khWorkload was last run
AuthoritativePod string `json:"AuthoritativePod" yaml:"AuthoritativePod"` // the main kuberhealthy pod creating and updating the khstate
CurrentUUID string `json:"uuid" yaml:"uuid"` // the UUID that is authorized to report statuses into the kuberhealthy endpoint
}

type KuberhealthyCheckDetail struct {
CurrentUUID string `json:"currentUUID"`
CheckName string `json:"checkName"`
OK uint8 `json:"ok"`
Errors string `json:"errors"`
RunDuration string `json:"runDuration"`
Namespace string `json:"namespace"`
Node string `json:"node"`
LastRun time.Time `json:"lastRun"`
AuthoritativePod string `json:"authoritativePod"`
}

0 comments on commit 224017a

Please sign in to comment.