Skip to content

Commit

Permalink
Merge pull request #864 from qmhu/extension-labels
Browse files Browse the repository at this point in the history
Extension label for prom query
  • Loading branch information
qmhu authored Sep 26, 2023
2 parents 6c6b7c9 + 4c29e75 commit 870ed4a
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 30 deletions.
3 changes: 3 additions & 0 deletions cmd/craned/app/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import (
"github.com/gocrane/crane/pkg/recommendation"
"github.com/gocrane/crane/pkg/server"
serverconfig "github.com/gocrane/crane/pkg/server/config"
"github.com/gocrane/crane/pkg/utils"
"github.com/gocrane/crane/pkg/utils/target"
"github.com/gocrane/crane/pkg/webhooks"
)
Expand Down Expand Up @@ -256,6 +257,8 @@ func initDataSources(mgr ctrl.Manager, opts *options.Options) (map[providers.Dat
hybridDataSources[providers.PrometheusDataSource] = provider
realtimeDataSources[providers.PrometheusDataSource] = provider
historyDataSources[providers.PrometheusDataSource] = provider

utils.SetExtensionLabels(opts.DataSourcePromConfig.ExtensionLabels)
}
}
return realtimeDataSources, historyDataSources, hybridDataSources
Expand Down
1 change: 1 addition & 0 deletions cmd/craned/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.DataSourcePromConfig.AdapterConfigMapKey, "prometheus-adapter-configmap-key", "", "prometheus adapter-configmap key")
flags.StringVar(&o.DataSourcePromConfig.AdapterConfig, "prometheus-adapter-config", "", "prometheus adapter-config path")
flags.StringVar(&o.DataSourcePromConfig.AdapterExtensionLabels, "prometheus-adapter-extension-labels", "", "prometheus adapter extension-labels for expressionQuery")
flags.StringVar(&o.DataSourcePromConfig.ExtensionLabels, "extension-labels", "", "extension-labels for every prometheus query")
flags.StringVar(&o.DataSourcePromConfig.Auth.Username, "prometheus-auth-username", "", "prometheus auth username")
flags.StringVar(&o.DataSourcePromConfig.Auth.Password, "prometheus-auth-password", "", "prometheus auth password")
flags.StringVar(&o.DataSourcePromConfig.Auth.BearerToken, "prometheus-auth-bearertoken", "", "prometheus auth bearertoken")
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type PromConfig struct {
AdapterConfigMapKey string
AdapterConfig string
AdapterExtensionLabels string
ExtensionLabels string
Timeout time.Duration
KeepAlive time.Duration
InsecureSkipVerify bool
Expand Down
92 changes: 62 additions & 30 deletions pkg/utils/expression_prom_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,48 @@ package utils

import (
"fmt"
"strings"
)

// todo: later we change these templates to configurable like prometheus-adapter
const (
ExtensionLabelsHolder = `EXTENSION_LABELS_HOLDER`
// WorkloadCpuUsageExprTemplate is used to query workload cpu usage by promql, param is namespace,workload-name,duration str
WorkloadCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{namespace="%s",pod=~"%s",container!=""}[%s]))`
WorkloadCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[%s]))`
// WorkloadMemUsageExprTemplate is used to query workload mem usage by promql, param is namespace, workload-name
WorkloadMemUsageExprTemplate = `sum(container_memory_working_set_bytes{namespace="%s",pod=~"%s",container!=""})`
WorkloadMemUsageExprTemplate = `sum(container_memory_working_set_bytes{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER})`

// following is node exporter metric for node cpu/memory usage
// NodeCpuUsageExprTemplate is used to query node cpu usage by promql, param is node name which prometheus scrape, duration str
NodeCpuUsageExprTemplate = `sum(count(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"}) by (mode, cpu)) - sum(irate(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"}[%s]))`
NodeCpuUsageExprTemplate = `sum(count(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER}) by (mode, cpu)) - sum(irate(node_cpu_seconds_total{mode="idle",instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER}[%s]))`
// NodeMemUsageExprTemplate is used to query node memory usage by promql, param is node name, node name which prometheus scrape
NodeMemUsageExprTemplate = `sum(node_memory_MemTotal_bytes{instance=~"(%s)(:\\d+)?"} - node_memory_MemAvailable_bytes{instance=~"(%s)(:\\d+)?"})`
NodeMemUsageExprTemplate = `sum(node_memory_MemTotal_bytes{instance=~"(%s)(:\\d+)?EXTENSION_LABELS_HOLDER"} - node_memory_MemAvailable_bytes{instance=~"(%s)(:\\d+)?"EXTENSION_LABELS_HOLDER})`

// NodeCpuRequestUtilizationExprTemplate is used to query node cpu request utilization by promql, param is node name, node name which prometheus scrape
NodeCpuRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) `
NodeCpuRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) `
// NodeMemRequestUtilizationExprTemplate is used to query node memory request utilization by promql, param is node name, node name which prometheus scrape
NodeMemRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="memory", unit="byte", namespace!=""} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) `
NodeMemRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="memory", unit="byte", namespace!=""EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) `
// NodeCpuUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape
NodeCpuUsageUtilizationExprTemplate = `sum(label_replace(irate(container_cpu_usage_seconds_total{instance="%s", container!="POD", container!="",image!=""}[1h]), "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) `
NodeCpuUsageUtilizationExprTemplate = `sum(label_replace(irate(container_cpu_usage_seconds_total{instance="%s", container!="POD", container!="",image!=""EXTENSION_LABELS_HOLDER}[1h]), "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) `
// NodeMemUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape
NodeMemUsageUtilizationExprTemplate = `sum(label_replace(container_memory_usage_bytes{instance="%s", namespace!="",container!="POD", container!="",image!=""}, "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) `
NodeMemUsageUtilizationExprTemplate = `sum(label_replace(container_memory_usage_bytes{instance="%s", namespace!="",container!="POD", container!="",image!=""EXTENSION_LABELS_HOLDER}, "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"EXTENSION_LABELS_HOLDER} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"EXTENSION_LABELS_HOLDER}) by (node)) by (node) `

// PodCpuUsageExprTemplate is used to query pod cpu usage by promql, param is namespace,pod, duration str
PodCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod="%s"}[%s]))`
PodCpuUsageExprTemplate = `sum(irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod="%s"EXTENSION_LABELS_HOLDER}[%s]))`
// PodMemUsageExprTemplate is used to query pod cpu usage by promql, param is namespace,pod
PodMemUsageExprTemplate = `sum(container_memory_working_set_bytes{container!="POD",namespace="%s",pod="%s"})`
PodMemUsageExprTemplate = `sum(container_memory_working_set_bytes{container!="POD",namespace="%s",pod="%s"EXTENSION_LABELS_HOLDER})`

// ContainerCpuUsageExprTemplate is used to query container cpu usage by promql, param is namespace,pod,container duration str
ContainerCpuUsageExprTemplate = `irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod=~"%s",container="%s"}[%s])`
ContainerCpuUsageExprTemplate = `irate(container_cpu_usage_seconds_total{container!="POD",namespace="%s",pod=~"%s",container="%s"EXTENSION_LABELS_HOLDER}[%s])`
// ContainerMemUsageExprTemplate is used to query container cpu usage by promql, param is namespace,pod,container
ContainerMemUsageExprTemplate = `container_memory_working_set_bytes{container!="POD",namespace="%s",pod=~"%s",container="%s"}`
ContainerMemUsageExprTemplate = `container_memory_working_set_bytes{container!="POD",namespace="%s",pod=~"%s",container="%s"EXTENSION_LABELS_HOLDER}`

CustomerExprTemplate = `sum(%s{%s})`
CustomerExprTemplate = `sum(%s{%sEXTENSION_LABELS_HOLDER})`

// Container network cumulative count of bytes received
queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))`
queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[3m]))`
// Container network cumulative count of bytes transmitted
queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))`
queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""EXTENSION_LABELS_HOLDER}[3m]))`
)

const (
Expand All @@ -51,6 +53,31 @@ const (
PostRegMatchesPodStatefulset = `[0-9]+$`
)

var ExtensionLabelArray []string
var extensionLabelsString string

func SetExtensionLabels(extensionLabels string) {
if extensionLabels != "" {
for _, label := range strings.Split(extensionLabels, ",") {
ExtensionLabelArray = append(ExtensionLabelArray, label)
}

extensionLabelsString = ","
for index, label := range ExtensionLabelArray {
labelArr := strings.Split(label, "=")
if len(labelArr) != 2 {
// skip the invalid kv
continue
}

extensionLabelsString += fmt.Sprintf("%s=\"%s\"", labelArr[0], labelArr[1])
if index != len(ExtensionLabelArray)-1 {
extensionLabelsString += ","
}
}
}
}

func GetPodNameReg(resourceName string, resourceType string) string {
switch resourceType {
case "DaemonSet":
Expand All @@ -66,61 +93,66 @@ func GetPodNameReg(resourceName string, resourceType string) string {
}

func GetCustomerExpression(metricName string, labels string) string {
return fmt.Sprintf(CustomerExprTemplate, metricName, labels)
return fmtSprintfInternal(CustomerExprTemplate, metricName, labels)
}

func GetWorkloadCpuUsageExpression(namespace string, name string, kind string) string {
return fmt.Sprintf(WorkloadCpuUsageExprTemplate, namespace, GetPodNameReg(name, kind), "3m")
return fmtSprintfInternal(WorkloadCpuUsageExprTemplate, namespace, GetPodNameReg(name, kind), "3m")
}

func GetWorkloadMemUsageExpression(namespace string, name string, kind string) string {
return fmt.Sprintf(WorkloadMemUsageExprTemplate, namespace, GetPodNameReg(name, kind))
return fmtSprintfInternal(WorkloadMemUsageExprTemplate, namespace, GetPodNameReg(name, kind))
}

func GetContainerCpuUsageExpression(namespace string, workloadName string, kind string, containerName string) string {
return fmt.Sprintf(ContainerCpuUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName, "3m")
return fmtSprintfInternal(ContainerCpuUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName, "3m")
}

func GetContainerMemUsageExpression(namespace string, workloadName string, kind string, containerName string) string {
return fmt.Sprintf(ContainerMemUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName)
return fmtSprintfInternal(ContainerMemUsageExprTemplate, namespace, GetPodNameReg(workloadName, kind), containerName)
}

func GetPodCpuUsageExpression(namespace string, name string) string {
return fmt.Sprintf(PodCpuUsageExprTemplate, namespace, name, "3m")
return fmtSprintfInternal(PodCpuUsageExprTemplate, namespace, name, "3m")
}

func GetPodMemUsageExpression(namespace string, name string) string {
return fmt.Sprintf(PodMemUsageExprTemplate, namespace, name)
return fmtSprintfInternal(PodMemUsageExprTemplate, namespace, name)
}

func GetNodeCpuUsageExpression(nodeName string) string {
return fmt.Sprintf(NodeCpuUsageExprTemplate, nodeName, nodeName, "3m")
return fmtSprintfInternal(NodeCpuUsageExprTemplate, nodeName, nodeName, "3m")
}

func GetNodeMemUsageExpression(nodeName string) string {
return fmt.Sprintf(NodeMemUsageExprTemplate, nodeName, nodeName)
return fmtSprintfInternal(NodeMemUsageExprTemplate, nodeName, nodeName)
}

func GetNodeCpuRequestUtilizationExpression(nodeName string) string {
return fmt.Sprintf(NodeCpuRequestUtilizationExprTemplate, nodeName, nodeName)
return fmtSprintfInternal(NodeCpuRequestUtilizationExprTemplate, nodeName, nodeName)
}

func GetNodeMemRequestUtilizationExpression(nodeName string) string {
return fmt.Sprintf(NodeMemRequestUtilizationExprTemplate, nodeName, nodeName)
return fmtSprintfInternal(NodeMemRequestUtilizationExprTemplate, nodeName, nodeName)
}

func GetNodeCpuUsageUtilizationExpression(nodeName string) string {
return fmt.Sprintf(NodeCpuUsageUtilizationExprTemplate, nodeName, nodeName)
return fmtSprintfInternal(NodeCpuUsageUtilizationExprTemplate, nodeName, nodeName)
}

func GetNodeMemUsageUtilizationExpression(nodeName string) string {
return fmt.Sprintf(NodeMemUsageUtilizationExprTemplate, nodeName, nodeName)
return fmtSprintfInternal(NodeMemUsageUtilizationExprTemplate, nodeName, nodeName)
}

func GetWorkloadNetReceiveBytesExpression(namespace string, name string, kind string) string {
return fmt.Sprintf(queryFmtNetReceiveBytes, namespace, GetPodNameReg(name, kind))
return fmtSprintfInternal(queryFmtNetReceiveBytes, namespace, GetPodNameReg(name, kind))
}

func GetWorkloadNetTransferBytesExpression(namespace string, name string, kind string) string {
return fmt.Sprintf(queryFmtNetTransferBytes, namespace, GetPodNameReg(name, kind))
return fmtSprintfInternal(queryFmtNetTransferBytes, namespace, GetPodNameReg(name, kind))
}

func fmtSprintfInternal(format string, a ...interface{}) string {
formatReplaced := strings.ReplaceAll(format, ExtensionLabelsHolder, extensionLabelsString)
return fmt.Sprintf(formatReplaced, a...)
}

0 comments on commit 870ed4a

Please sign in to comment.