diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index fdb66ec7ffa7..ffa6d796528d 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -63,173 +63,62 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "id": 25, - "iteration": 1618283470402, + "id": 19, + "iteration": 1696750493301, "links": [], "panels": [ { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "description": "It indicates whether the current PD is the leader or a follower.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 55, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "1", - "text": "Leader", - "to": "100000" - }, - { - "from": "0", - "text": "Follower", - "to": "1" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "pd_tso_role{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", dc=\"global\"}", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "metric": "pd_server_tso", - "refId": "A", - "step": 40 - } - ], - "thresholds": "", - "title": "PD role", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "Unknown", - "value": "null" - }, - { - "op": "=", - "text": "Follower", - "value": "0" - }, - { - "op": "=", - "text": "Leader", - "value": "1" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, "description": "The total capacity size of the cluster", - "editable": true, - "error": false, - "format": "decbytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "unit": "decbytes" + } }, "gridPos": { "h": 6, "w": 4, - "x": 4, + "x": 0, "y": 0 }, "id": 10, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(77, 135, 25, 0.18)", - "full": true, - "lineColor": "rgb(21, 179, 65)", - "show": false + "options": { + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "7.5.11", "targets": [ { "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\",type=\"storage_capacity\"})", @@ -239,83 +128,64 @@ "step": 40 } ], - "thresholds": "", "title": "Storage capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, "description": "The current storage size of the cluster", - "editable": true, - "error": false, - "format": "decbytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "decimals": 1, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "unit": "decbytes" + }, + "overrides": [] }, "gridPos": { "h": 6, "w": 4, - "x": 8, + "x": 4, "y": 0 }, "hideTimeOverride": false, "id": 38, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "7.5.11", "targets": [ { "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\",type=\"storage_size\"})", @@ -325,18 +195,8 @@ "step": 40 } ], - "thresholds": "", "title": "Current storage size", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { "cacheTimeout": null, @@ -362,7 +222,7 @@ "gridPos": { "h": 6, "w": 4, - "x": 12, + "x": 8, "y": 0 }, "hideTimeOverride": false, @@ -425,147 +285,78 @@ }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], "datasource": "${DS_TEST-CLUSTER}", "description": "The count of healthy stores", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 6, "w": 4, - "x": 16, + "x": 12, "y": 0 }, "id": 97, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "graphMode": "none", + "justifyMode": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "7.5.11", "targets": [ { + "exemplar": true, "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", type=\"store_up_count\"})", "format": "time_series", + "interval": "", "intervalFactor": 2, + "legendFormat": "", "refId": "A" } ], - "thresholds": "0,1", + "timeFrom": null, + "timeShift": null, "title": "Normal stores", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], "datasource": "${DS_TEST-CLUSTER}", "description": "The total number of Regions without replicas", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, "gridPos": { "h": 6, "w": 4, - "x": 20, + "x": 16, "y": 0 }, "id": 20, "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true + "options": { + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "7.5.11", "targets": [ { "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\",type=\"leader_count\"})", @@ -575,18 +366,61 @@ "step": 40 } ], - "thresholds": "", "title": "Number of Regions", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ + "type": "stat" + }, + { + "datasource": "${DS_TEST-CLUSTER}", + "description": "The current peer count of the cluster", + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 18, + "links": [], + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "7.5.11", + "targets": [ { - "op": "=", - "text": "N/A", - "value": "null" + "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", type=\"region_count\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "count", + "refId": "A", + "step": 4 } ], - "valueName": "current" + "timeFrom": null, + "timeShift": null, + "title": "Current peer count", + "type": "stat" }, { "columns": [ @@ -702,7 +536,7 @@ "timeFrom": "1s", "title": "Abnormal stores", "transform": "timeseries_aggregations", - "type": "table" + "type": "table-old" }, { "alert": { @@ -774,6 +608,7 @@ "nullPointMode": "null", "paceLength": 10, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -804,8 +639,7 @@ "fill": true, "line": true, "op": "gt", - "value": 100, - "yaxis": "left" + "value": 100 } ], "timeFrom": null, @@ -849,99 +683,68 @@ } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, + "cacheTimeout": null, "datasource": "${DS_TEST-CLUSTER}", - "decimals": null, - "description": "The current peer count of the cluster", - "editable": true, - "error": false, - "fill": 0, - "fillGradient": 0, - "grid": {}, + "description": "It indicates the current leader/primary of services", "gridPos": { "h": 7, "w": 8, "x": 16, "y": 6 }, - "hiddenSeries": false, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 3, + "id": 55, + "interval": null, "links": [], - "nullPointMode": "null", - "paceLength": 10, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, + "maxDataPoints": 100, + "options": { + "showHeader": true + }, + "pluginVersion": "7.5.11", + "tableColumn": "instance", "targets": [ { - "expr": "sum(pd_cluster_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\", type=\"region_count\"})", - "format": "time_series", + "exemplar": true, + "expr": "service_member_role{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "format": "table", + "instant": true, + "interval": "", "intervalFactor": 2, - "legendFormat": "count", + "legendFormat": "", + "metric": "pd_server_tso", "refId": "A", - "step": 4 + "step": 40 } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Current peer count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "title": "Leader/Primary", + "transformations": [ { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "instance", + "service" + ] + } + } }, { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "instance": 1, + "service": 0 + }, + "renameByName": { + "instance": "" + } + } } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "table" }, { "datasource": "${DS_TEST-CLUSTER}", @@ -1060,6 +863,26 @@ }, { "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "filterable": false + }, + "mappings": [ + { + "from": "", + "id": 1, + "text": "N/A", + "to": "", + "type": 1, + "value": "1.7976931348623157e+308" + } + ], + "unit": "s" + }, + "overrides": [] + }, "gridPos": { "h": 6, "w": 4, @@ -1073,37 +896,18 @@ "pluginVersion": "7.5.11", "targets": [ { + "exemplar": true, "expr": "pd_cluster_eta{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}", - "legendFormat": "{{action}}-{{store}}", + "instant": true, "interval": "", - "exemplar": true, + "legendFormat": "{{action}}-{{store}}", "queryType": "randomWalk", - "refId": "A", - "instant": true + "refId": "A" } ], + "timeFrom": null, + "timeShift": null, "title": "Left time", - "type": "table", - "fieldConfig": { - "defaults": { - "custom": { - "align": "left", - "filterable": false - }, - "mappings": [ - { - "id": 1, - "type": 1, - "from": "", - "to": "", - "text": "N/A", - "value": "1.7976931348623157e+308" - } - ], - "unit": "s" - }, - "overrides": [] - }, "transformations": [ { "id": "reduce", @@ -1114,13 +918,21 @@ } } ], - "timeFrom": null, - "timeShift": null + "type": "table" }, { - "type": "table", - "title": "Current scaling speed", "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "filterable": false + }, + "mappings": [], + "unit": "MBs" + }, + "overrides": [] + }, "gridPos": { "h": 6, "w": 4, @@ -1128,31 +940,24 @@ "y": 13 }, "id": 1112, + "options": { + "showHeader": false + }, + "pluginVersion": "7.5.11", "targets": [ { + "exemplar": true, "expr": "pd_cluster_speed{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}", - "legendFormat": "{{action}}-{{store}}", + "instant": true, "interval": "", - "exemplar": true, - "refId": "A", + "legendFormat": "{{action}}-{{store}}", "queryType": "randomWalk", - "instant": true + "refId": "A" } ], - "options": { - "showHeader": false - }, - "fieldConfig": { - "defaults": { - "custom": { - "align": "left", - "filterable": false - }, - "mappings": [], - "unit": "MBs" - }, - "overrides": [] - }, + "timeFrom": null, + "timeShift": null, + "title": "Current scaling speed", "transformations": [ { "id": "reduce", @@ -1163,11 +968,11 @@ } } ], - "timeFrom": null, - "timeShift": null + "type": "table" }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -1238,7 +1043,7 @@ "timeFrom": "1s", "title": "PD scheduler config", "transform": "timeseries_aggregations", - "type": "table" + "type": "table-old" }, { "columns": [], @@ -1361,7 +1166,7 @@ "timeShift": null, "title": "Store limit", "transform": "table", - "type": "table" + "type": "table-old" }, { "cacheTimeout": null, @@ -1426,7 +1231,7 @@ "timeShift": null, "title": "Cluster ID", "transform": "timeseries_aggregations", - "type": "table" + "type": "table-old" }, { "columns": [ @@ -1506,7 +1311,7 @@ "timeShift": null, "title": "Region label isolation level", "transform": "timeseries_aggregations", - "type": "table" + "type": "table-old" }, { "columns": [ @@ -1593,7 +1398,7 @@ "timeFrom": "1s", "title": "Label distribution", "transform": "timeseries_aggregations", - "type": "table" + "type": "table-old" }, { "cacheTimeout": null, @@ -2417,7 +2222,6 @@ }, "yaxes": [ { - "$$hashKey": "object:533", "format": "short", "label": null, "logBase": 1, @@ -2426,7 +2230,6 @@ "show": true }, { - "$$hashKey": "object:534", "format": "short", "label": null, "logBase": 1, @@ -2539,6 +2342,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -3508,6 +3312,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -4711,6 +4516,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -5957,6 +5763,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -6940,6 +6747,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -8337,6 +8145,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -8551,6 +8360,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -8761,6 +8571,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -8971,6 +8782,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -9033,7 +8845,7 @@ "hide": false, "intervalFactor": 1, "legendFormat": "{{instance}}-logically-in-use", - "refId": "B" + "refId": "B" } ], "thresholds": [], @@ -10070,6 +9882,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -10365,10 +10178,10 @@ { "expr": "sum(rate(pd_tso_events{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type,dc)", "format": "time_series", + "hide": true, "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}-{{dc}}", - "hide": true, "refId": "B", "step": 2 } @@ -10536,6 +10349,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -10789,7 +10603,6 @@ "renderer": "flot", "seriesOverrides": [ { - "$$hashKey": "object:363", "alias": "/pending.*/", "yaxis": 2 } @@ -10836,7 +10649,6 @@ }, "yaxes": [ { - "$$hashKey": "object:307", "format": "ops", "label": null, "logBase": 1, @@ -10845,7 +10657,6 @@ "show": true }, { - "$$hashKey": "object:308", "format": "short", "label": null, "logBase": 1, @@ -11725,6 +11536,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -12197,6 +12009,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -12988,16 +12801,24 @@ "type": "row" } ], - "refresh": "30s", - "schemaVersion": 18, + "refresh": "5s", + "schemaVersion": 27, "style": "dark", "tags": [], "templating": { "list": [ { "allValue": null, - "current": {}, + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, "datasource": "${DS_TEST-CLUSTER}", + "definition": "", + "description": null, + "error": null, "hide": 2, "includeAll": false, "label": "K8s-cluster", @@ -13007,6 +12828,7 @@ "query": "label_values(pd_cluster_status, k8s_cluster)", "refresh": 2, "regex": "", + "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], @@ -13018,11 +12840,14 @@ "allValue": null, "current": { "isNone": true, + "selected": false, "text": "None", "value": "" }, "datasource": "${DS_TEST-CLUSTER}", "definition": "", + "description": null, + "error": null, "hide": 2, "includeAll": false, "label": "tidb_cluster", @@ -13048,6 +12873,8 @@ }, "datasource": "${DS_TEST-CLUSTER}", "definition": "", + "description": null, + "error": null, "hide": 0, "includeAll": false, "label": null, @@ -13068,11 +12895,18 @@ { "allValue": ".*", "current": { - "text": "All", - "value": "$__all" + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] }, "datasource": "${DS_TEST-CLUSTER}", "definition": "label_values(pd_scheduler_store_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}, store)", + "description": null, + "error": null, "hide": 0, "includeAll": true, "label": "store", diff --git a/pkg/mcs/resourcemanager/server/server.go b/pkg/mcs/resourcemanager/server/server.go index 9b9bd91c6eb2..dbc60ef9489d 100644 --- a/pkg/mcs/resourcemanager/server/server.go +++ b/pkg/mcs/resourcemanager/server/server.go @@ -52,6 +52,8 @@ import ( var _ bs.Server = (*Server)(nil) +const serviceName = "Resource Manager" + // Server is the resource manager server, and it implements bs.Server. type Server struct { *server.BaseServer @@ -168,6 +170,7 @@ func (s *Server) campaignLeader() { defer resetLeaderOnce.Do(func() { cancel() s.participant.ResetLeader() + member.ServiceMemberGauge.WithLabelValues(serviceName).Set(0) }) // maintain the leadership, after this, Resource Manager could be ready to provide service. @@ -180,6 +183,7 @@ func (s *Server) campaignLeader() { } s.participant.EnableLeader() + member.ServiceMemberGauge.WithLabelValues(serviceName).Set(1) log.Info("resource manager primary is ready to serve", zap.String("resource-manager-primary-name", s.participant.Name())) leaderTicker := time.NewTicker(utils.LeaderTickInterval) @@ -382,8 +386,8 @@ func CreateServerWrapper(cmd *cobra.Command, args []string) { // Flushing any buffered log entries defer log.Sync() - versioninfo.Log("Resource Manager") - log.Info("Resource Manager config", zap.Reflect("config", cfg)) + versioninfo.Log(serviceName) + log.Info("Resource manager config", zap.Reflect("config", cfg)) grpcprometheus.EnableHandlingTimeHistogram() metricutil.Push(&cfg.Metric) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 4ec2f2731e79..ee4b58854233 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -62,7 +62,11 @@ import ( var _ bs.Server = (*Server)(nil) -const memberUpdateInterval = time.Minute +const ( + serviceName = "Scheduling Service" + + memberUpdateInterval = time.Minute +) // Server is the scheduling server, and it implements bs.Server. type Server struct { @@ -255,6 +259,7 @@ func (s *Server) campaignLeader() { defer resetLeaderOnce.Do(func() { cancel() s.participant.ResetLeader() + member.ServiceMemberGauge.WithLabelValues(serviceName).Set(0) }) // maintain the leadership, after this, Scheduling could be ready to provide service. @@ -274,6 +279,7 @@ func (s *Server) campaignLeader() { } }() s.participant.EnableLeader() + member.ServiceMemberGauge.WithLabelValues(serviceName).Set(1) log.Info("scheduling primary is ready to serve", zap.String("scheduling-primary-name", s.participant.Name())) leaderTicker := time.NewTicker(utils.LeaderTickInterval) @@ -531,8 +537,8 @@ func CreateServerWrapper(cmd *cobra.Command, args []string) { // Flushing any buffered log entries defer log.Sync() - versioninfo.Log("Scheduling") - log.Info("Scheduling config", zap.Reflect("config", cfg)) + versioninfo.Log(serviceName) + log.Info("Scheduling service config", zap.Reflect("config", cfg)) grpcprometheus.EnableHandlingTimeHistogram() metricutil.Push(&cfg.Metric) diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 133f87b78f37..16ef3216c629 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -58,6 +58,8 @@ import ( var _ bs.Server = (*Server)(nil) var _ tso.ElectionMember = (*member.Participant)(nil) +const serviceName = "TSO Service" + // Server is the TSO server, and it implements bs.Server. type Server struct { *server.BaseServer @@ -450,8 +452,8 @@ func CreateServerWrapper(cmd *cobra.Command, args []string) { // Flushing any buffered log entries defer log.Sync() - versioninfo.Log("TSO") - log.Info("TSO config", zap.Reflect("config", cfg)) + versioninfo.Log(serviceName) + log.Info("TSO service config", zap.Reflect("config", cfg)) grpcprometheus.EnableHandlingTimeHistogram() metricutil.Push(&cfg.Metric) diff --git a/pkg/member/metrics.go b/pkg/member/metrics.go new file mode 100644 index 000000000000..d2b99f0cf932 --- /dev/null +++ b/pkg/member/metrics.go @@ -0,0 +1,32 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package member + +import "github.com/prometheus/client_golang/prometheus" + +var ( + // ServiceMemberGauge is used to record the leader/primary of services. + ServiceMemberGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "service", + Subsystem: "member", + Name: "role", + Help: "The leader/primary of services", + }, []string{"service"}) +) + +func init() { + prometheus.MustRegister(ServiceMemberGauge) +} diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index aaa29d41516b..613ceb3eafc6 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -29,6 +29,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/tikv/pd/pkg/errs" mcsutils "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/logutil" @@ -619,10 +620,13 @@ func (gta *GlobalTSOAllocator) campaignLeader() { gta.am.ResetAllocatorGroup(GlobalDCLocation) }() + tsoLabel := fmt.Sprintf("TSO Service Group %d", gta.getGroupID()) gta.member.EnableLeader() + member.ServiceMemberGauge.WithLabelValues(tsoLabel).Set(1) defer resetLeaderOnce.Do(func() { cancel() gta.member.ResetLeader() + member.ServiceMemberGauge.WithLabelValues(tsoLabel).Set(0) }) // TODO: if enable-local-tso is true, check the cluster dc-location after the primary is elected diff --git a/server/server.go b/server/server.go index aa3ef12c4f5b..c15e0156db00 100644 --- a/server/server.go +++ b/server/server.go @@ -102,7 +102,7 @@ const ( // PDMode represents that server is in PD mode. PDMode = "PD" // APIServiceMode represents that server is in API service mode. - APIServiceMode = "API service" + APIServiceMode = "API Service" // maxRetryTimesGetServicePrimary is the max retry times for getting primary addr. // Note: it need to be less than client.defaultPDTimeout @@ -1721,6 +1721,7 @@ func (s *Server) campaignLeader() { } // EnableLeader to accept the remaining service, such as GetStore, GetRegion. s.member.EnableLeader() + member.ServiceMemberGauge.WithLabelValues(s.mode).Set(1) if !s.IsAPIServiceMode() { // Check the cluster dc-location after the PD leader is elected. go s.tsoAllocatorManager.ClusterDCLocationChecker() @@ -1730,6 +1731,7 @@ func (s *Server) campaignLeader() { // to be new leader. cancel() s.member.ResetLeader() + member.ServiceMemberGauge.WithLabelValues(s.mode).Set(0) }) CheckPDVersion(s.persistOptions)