diff --git a/tools/pd-heartbeat-bench/main.go b/tools/pd-heartbeat-bench/main.go index 16c725155552..46dd234bdfef 100644 --- a/tools/pd-heartbeat-bench/main.go +++ b/tools/pd-heartbeat-bench/main.go @@ -484,7 +484,7 @@ func main() { log.Fatal("initialize logger error", zap.Error(err)) } - metrics.InitMetric2Collect(cfg.MetricsAddr) + withMetric := metrics.InitMetric2Collect(cfg.MetricsAddr) maxVersion = cfg.InitEpochVer options := config.NewOptions(cfg) // let PD have enough time to start @@ -548,7 +548,9 @@ func main() { wg.Add(1) go regions.handleRegionHeartbeat(wg, streams[id], id, rep) } - go metrics.CollectMetrics(regions.updateRound, 1*time.Second) + if withMetric { + metrics.CollectMetrics(regions.updateRound, time.Second) + } wg.Wait() since := time.Since(startTime).Seconds() @@ -695,7 +697,7 @@ func runHTTPServer(cfg *config.Config, options *config.Options) { c.IndentedJSON(http.StatusOK, output) }) - engine.GET("metrics_collect", func(c *gin.Context) { + engine.GET("metrics-collect", func(c *gin.Context) { second := c.Query("second") if second == "" { c.String(http.StatusBadRequest, "missing second") @@ -706,7 +708,6 @@ func runHTTPServer(cfg *config.Config, options *config.Options) { c.String(http.StatusBadRequest, "invalid second") return } - metrics.InitMetric2Collect(cfg.MetricsAddr) metrics.CollectMetrics(metrics.WarmUpRound, time.Duration(secondInt)*time.Second) c.IndentedJSON(http.StatusOK, "Successfully collect metrics") }) diff --git a/tools/pd-heartbeat-bench/metrics/util.go b/tools/pd-heartbeat-bench/metrics/util.go index b7883fb354bc..96ecf0da15c9 100644 --- a/tools/pd-heartbeat-bench/metrics/util.go +++ b/tools/pd-heartbeat-bench/metrics/util.go @@ -50,7 +50,7 @@ var ( memoryMetric = `max_over_time(go_memstats_heap_inuse_bytes{job=~".*pd.*"}[1h])/1024/1024/1024` goRoutineMetric = `max_over_time(go_goroutines{job=~".*pd.*"}[1h])` hbLatency99Metric = `histogram_quantile(0.99, sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_bucket{}[1m])) by (le))` - hbLatencyAvgMetric = `sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_sum{}[1m])) / sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_count{}[1m])) * 1000` + hbLatencyAvgMetric = `sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_sum{}[1m])) / sum(rate(pd_scheduler_handle_region_heartbeat_duration_seconds_count{}[1m]))` // Heartbeat Performance Duration BreakDown hbBreakdownName = "Heartbeat Performance Duration BreakDown (Accumulation)(ms)" @@ -76,7 +76,7 @@ type Metric struct { value float64 } -func InitMetric2Collect(endpoint string) { +func InitMetric2Collect(endpoint string) (withMetric bool) { for _, name := range breakdownNames { metrics2Collect = append(metrics2Collect, Metric{ promSQL: hbBreakdownMetricByName(name), @@ -91,13 +91,20 @@ func InitMetric2Collect(endpoint string) { cu, err := url.Parse(endpoint) if err != nil { log.Error("parse prometheus url error", zap.Error(err)) - return + return false } prometheusCli, err = NewPrometheusClient(*cu) if err != nil { log.Error("create prometheus client error", zap.Error(err)) - return + return false + } + // check whether the prometheus is available + _, err = getMetric(prometheusCli, goRoutineMetric, time.Now()) + if err != nil { + log.Error("check prometheus availability error, please check the prometheus address", zap.Error(err)) + return false } + return true } func NewPrometheusClient(prometheusURL url.URL) (api.Client, error) { @@ -111,7 +118,7 @@ func NewPrometheusClient(prometheusURL url.URL) (api.Client, error) { return client, nil } -// wait for the first round to warm up +// WarmUpRound wait for the first round to warm up const WarmUpRound = 1 func CollectMetrics(curRound int, wait time.Duration) {