From eada10562a15a85446a6fa3e79266cb4194a0820 Mon Sep 17 00:00:00 2001 From: Joe Talerico aka rook Date: Fri, 15 Nov 2024 12:28:50 -0500 Subject: [PATCH] Adding vswitch data Signed-off-by: Joe Talerico aka rook --- cmd/k8s-netperf/k8s-netperf.go | 4 + pkg/archive/archive.go | 158 ++++++++++++++++++++------------- pkg/metrics/system.go | 4 +- 3 files changed, 101 insertions(+), 65 deletions(-) diff --git a/cmd/k8s-netperf/k8s-netperf.go b/cmd/k8s-netperf/k8s-netperf.go index 379926f..84789a3 100644 --- a/cmd/k8s-netperf/k8s-netperf.go +++ b/cmd/k8s-netperf/k8s-netperf.go @@ -257,6 +257,10 @@ var rootCmd = &cobra.Command{ if len(npr.ClientNodeInfo.NodeName) > 0 && len(npr.ServerNodeInfo.NodeName) > 0 { sr.Results[i].ClientMetrics, _ = metrics.QueryNodeCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime) sr.Results[i].ServerMetrics, _ = metrics.QueryNodeCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime) + metrics.VSwitchCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics) + metrics.VSwitchMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics) + metrics.VSwitchCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics) + metrics.VSwitchMem(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics) sr.Results[i].ClientPodCPU, _ = metrics.TopPodCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime) sr.Results[i].ServerPodCPU, _ = metrics.TopPodCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime) sr.Results[i].ClientPodMem, _ = metrics.TopPodMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime) diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index d575ecb..ac88b68 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -19,38 +19,42 @@ const ltcyMetric = "usec" // Doc struct of the JSON document to be indexed type Doc struct { - UUID string `json:"uuid"` - Timestamp time.Time `json:"timestamp"` - HostNetwork bool `json:"hostNetwork"` - Driver string `json:"driver"` - Parallelism int `json:"parallelism"` - Profile string `json:"profile"` - Duration int `json:"duration"` - Service bool `json:"service"` - Local bool `json:"local"` - Virt bool `json:"virt"` - AcrossAZ bool `json:"acrossAZ"` - Samples int `json:"samples"` - Messagesize int `json:"messageSize"` - Burst int `json:"burst"` - Throughput float64 `json:"throughput"` - Latency float64 `json:"latency"` - TputMetric string `json:"tputMetric"` - LtcyMetric string `json:"ltcyMetric"` - TCPRetransmit float64 `json:"tcpRetransmits"` - UDPLossPercent float64 `json:"udpLossPercent"` - ToolVersion string `json:"toolVersion"` - ToolGitCommit string `json:"toolGitCommit"` - Metadata result.Metadata `json:"metadata"` - ServerNodeCPU metrics.NodeCPU `json:"serverCPU"` - ServerPodCPU []metrics.PodCPU `json:"serverPods"` - ServerPodMem []metrics.PodMem `json:"serverPodsMem"` - ClientNodeCPU metrics.NodeCPU `json:"clientCPU"` - ClientPodCPU []metrics.PodCPU `json:"clientPods"` - ClientPodMem []metrics.PodMem `json:"clientPodsMem"` - Confidence []float64 `json:"confidence"` - ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"` - ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"` + UUID string `json:"uuid"` + Timestamp time.Time `json:"timestamp"` + HostNetwork bool `json:"hostNetwork"` + Driver string `json:"driver"` + Parallelism int `json:"parallelism"` + Profile string `json:"profile"` + Duration int `json:"duration"` + Service bool `json:"service"` + Local bool `json:"local"` + Virt bool `json:"virt"` + AcrossAZ bool `json:"acrossAZ"` + Samples int `json:"samples"` + Messagesize int `json:"messageSize"` + Burst int `json:"burst"` + Throughput float64 `json:"throughput"` + Latency float64 `json:"latency"` + TputMetric string `json:"tputMetric"` + LtcyMetric string `json:"ltcyMetric"` + TCPRetransmit float64 `json:"tcpRetransmits"` + UDPLossPercent float64 `json:"udpLossPercent"` + ToolVersion string `json:"toolVersion"` + ToolGitCommit string `json:"toolGitCommit"` + Metadata result.Metadata `json:"metadata"` + ServerNodeCPU metrics.NodeCPU `json:"serverCPU"` + ServerPodCPU []metrics.PodCPU `json:"serverPods"` + ServerPodMem []metrics.PodMem `json:"serverPodsMem"` + ClientNodeCPU metrics.NodeCPU `json:"clientCPU"` + ClientPodCPU []metrics.PodCPU `json:"clientPods"` + ClientPodMem []metrics.PodMem `json:"clientPodsMem"` + Confidence []float64 `json:"confidence"` + ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"` + ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"` + ServerVSwitchCpu float64 `json:"serverVswtichCpu"` + ServerVSwitchMem float64 `json:"serverVswitchMem"` + ClientVSwitchCpu float64 `json:"clientVswtichCpu"` + ClientVSwiitchMem float64 `json:"clientVswitchMem"` } // Connect returns a client connected to the desired cluster. @@ -91,33 +95,37 @@ func BuildDocs(sr result.ScenarioResults, uuid string) ([]interface{}, error) { } c := []float64{lo, hi} d := Doc{ - UUID: uuid, - Timestamp: time, - ToolVersion: sr.Version, - ToolGitCommit: sr.GitCommit, - Driver: r.Driver, - HostNetwork: r.HostNetwork, - Parallelism: r.Parallelism, - Profile: r.Profile, - Duration: r.Duration, - Virt: sr.Virt, - Samples: r.Samples, - Service: r.Service, - Messagesize: r.MessageSize, - Burst: r.Burst, - TputMetric: r.Metric, - LtcyMetric: ltcyMetric, - ServerNodeCPU: r.ServerMetrics, - ClientNodeCPU: r.ClientMetrics, - ServerPodCPU: r.ServerPodCPU.Results, - ServerPodMem: r.ServerPodMem.MemResults, - ClientPodMem: r.ClientPodMem.MemResults, - ClientPodCPU: r.ClientPodCPU.Results, - Metadata: sr.Metadata, - AcrossAZ: r.AcrossAZ, - Confidence: c, - ClientNodeInfo: r.ClientNodeInfo, - ServerNodeInfo: r.ServerNodeInfo, + UUID: uuid, + Timestamp: time, + ToolVersion: sr.Version, + ToolGitCommit: sr.GitCommit, + Driver: r.Driver, + HostNetwork: r.HostNetwork, + Parallelism: r.Parallelism, + Profile: r.Profile, + Duration: r.Duration, + Virt: sr.Virt, + Samples: r.Samples, + Service: r.Service, + Messagesize: r.MessageSize, + Burst: r.Burst, + TputMetric: r.Metric, + LtcyMetric: ltcyMetric, + ServerNodeCPU: r.ServerMetrics, + ClientNodeCPU: r.ClientMetrics, + ServerPodCPU: r.ServerPodCPU.Results, + ServerPodMem: r.ServerPodMem.MemResults, + ClientPodMem: r.ClientPodMem.MemResults, + ClientPodCPU: r.ClientPodCPU.Results, + ClientVSwitchCpu: r.ClientMetrics.VSwitchCPU, + ClientVSwiitchMem: r.ClientMetrics.VSwitchMem, + ServerVSwitchCpu: r.ServerMetrics.VSwitchCPU, + ServerVSwitchMem: r.ServerMetrics.VSwitchMem, + Metadata: sr.Metadata, + AcrossAZ: r.AcrossAZ, + Confidence: c, + ClientNodeInfo: r.ClientNodeInfo, + ServerNodeInfo: r.ServerNodeInfo, } UDPLossPercent, e := result.Average(r.LossSummary) if e != nil { @@ -193,7 +201,7 @@ func commonCsvDataFields(row result.Data) []string { } // Writes all the mertics to the archive. -func writeArchive(cpuarchive, podarchive, podmemarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU, podMem []metrics.PodMem) error { +func writeArchive(vswitch, cpuarchive, podarchive, podmemarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU, podMem []metrics.PodMem) error { roleFieldData := []string{role} for _, pod := range podResults { if err := podarchive.Write(append(append(roleFieldData, @@ -218,6 +226,12 @@ func writeArchive(cpuarchive, podarchive, podmemarchive *csv.Writer, role string if role == "Server" { cpu = row.ServerMetrics } + if err := vswitch.Write(append(append(roleFieldData, + commonCsvDataFields(row)...), + fmt.Sprintf("%f", cpu.VSwitchCPU), + fmt.Sprintf("%f", cpu.VSwitchMem))); err != nil { + return fmt.Errorf("failed to write archive to file") + } if err := cpuarchive.Write(append(append(roleFieldData, commonCsvDataFields(row)...), fmt.Sprintf("%f", cpu.Idle), @@ -237,6 +251,11 @@ func writeArchive(cpuarchive, podarchive, podmemarchive *csv.Writer, role string func WritePromCSVResult(r result.ScenarioResults) error { d := time.Now().Unix() + vswitchfp, err := os.Create(fmt.Sprintf("vswitch-result-%d.csv", d)) + if err != nil { + return fmt.Errorf("failed to open vswitch archive file") + } + defer vswitchfp.Close() podmemfp, err := os.Create(fmt.Sprintf("podmem-result-%d.csv", d)) if err != nil { return fmt.Errorf("failed to open pod mem archive file") @@ -252,13 +271,15 @@ func WritePromCSVResult(r result.ScenarioResults) error { return fmt.Errorf("failed to open cpu archive file") } defer cpufp.Close() + vswitch := csv.NewWriter(vswitchfp) + defer vswitch.Flush() cpuarchive := csv.NewWriter(cpufp) defer cpuarchive.Flush() podarchive := csv.NewWriter(podfp) defer podarchive.Flush() - podmemarchive := csv.NewWriter(podmemfp) defer podmemarchive.Flush() + roleField := []string{"Role"} cpudata := append(append(roleField, commonCsvHeaderFields()...), @@ -275,17 +296,28 @@ func WritePromCSVResult(r result.ScenarioResults) error { "Pod Name", "Utilization", ) + vswtichdata := append(append(roleField, + commonCsvHeaderFields()...), + "CPU Utilization", + "Memory Utilization", + ) if err := cpuarchive.Write(cpudata); err != nil { return fmt.Errorf("failed to write cpu archive to file") } if err := podarchive.Write(poddata); err != nil { return fmt.Errorf("failed to write pod archive to file") } + if err := podmemarchive.Write(poddata); err != nil { + return fmt.Errorf("failed to write pod archive to file") + } + if err := vswitch.Write(vswtichdata); err != nil { + return fmt.Errorf("failed to write vswitch archive to file") + } for _, row := range r.Results { - if err := writeArchive(cpuarchive, podarchive, podmemarchive, "Client", row, row.ClientPodCPU.Results, row.ClientPodMem.MemResults); err != nil { + if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Client", row, row.ClientPodCPU.Results, row.ClientPodMem.MemResults); err != nil { return err } - if err := writeArchive(cpuarchive, podarchive, podmemarchive, "Server", row, row.ServerPodCPU.Results, row.ServerPodMem.MemResults); err != nil { + if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Server", row, row.ServerPodCPU.Results, row.ServerPodMem.MemResults); err != nil { return err } } diff --git a/pkg/metrics/system.go b/pkg/metrics/system.go index e0a8681..34ab9f4 100644 --- a/pkg/metrics/system.go +++ b/pkg/metrics/system.go @@ -177,7 +177,7 @@ func QueryNodeCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Tim // TopPodCPU will return the top 5 CPU consumers for a specific node func TopPodCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) { var pods PodValues - query := fmt.Sprintf("topk(5,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP) + query := fmt.Sprintf("topk(10,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP) logging.Debugf("Prom Query : %s", query) val, err := conn.Client.QueryRange(query, start, end, time.Minute) if err != nil { @@ -230,7 +230,7 @@ func VSwitchMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time, // TopPodMem will return the top 5 Mem consumers for a specific node func TopPodMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) { var pods PodValues - query := fmt.Sprintf("topk(5,sum(container_memory_rss{container!=\"POD\",name!=\"\",node=~\"%s\"}) by (pod, namespace, node))", node.NodeName) + query := fmt.Sprintf("topk(10,sum(container_memory_rss{container!=\"POD\",name!=\"\",node=~\"%s\"}) by (pod, namespace, node))", node.NodeName) logging.Debugf("Prom Query : %s", query) val, err := conn.Client.QueryRange(query, start, end, time.Minute) if err != nil {