From 074f590e3c52656a41ce56801676b53238b48c57 Mon Sep 17 00:00:00 2001 From: Justin Dyer Date: Tue, 3 Dec 2024 12:03:32 -0500 Subject: [PATCH 1/4] Added Hybrid Compute dataDiskReadLatency alert --- services/HybridCompute/machines/_index.md | 2 + services/HybridCompute/machines/alerts.yaml | 51 +++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 services/HybridCompute/machines/alerts.yaml diff --git a/services/HybridCompute/machines/_index.md b/services/HybridCompute/machines/_index.md index 5fe11e12d..6a6df8af8 100644 --- a/services/HybridCompute/machines/_index.md +++ b/services/HybridCompute/machines/_index.md @@ -3,3 +3,5 @@ title: machines geekdocCollapseSection: true geekdocHidden: false --- + +{{< alertList name="alertList" >}} diff --git a/services/HybridCompute/machines/alerts.yaml b/services/HybridCompute/machines/alerts.yaml new file mode 100644 index 000000000..adabf24f1 --- /dev/null +++ b/services/HybridCompute/machines/alerts.yaml @@ -0,0 +1,51 @@ +- name: HybridVMHighDataDiskReadLatencyAlert + description: Log Alert for Hybrid Virtual Machine dataDiskReadLatency + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Count + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 30 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk" and Name == "ReadLatencyMs" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk !in ("C:", "/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid VM Data Disk Read Latency Alert + template: Deploy-Hybrid-VM-DataDiskReadLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 6aff7331-001f-4ee6-b5d2-1fc43b18b7de From 383062b7093c3c16f90b172f3e94288e5ecf1c91 Mon Sep 17 00:00:00 2001 From: Justin Dyer Date: Thu, 5 Dec 2024 10:12:48 -0500 Subject: [PATCH 2/4] Added initial alerts for Hybrid Machines --- services/HybridCompute/machines/alerts.yaml | 159 +++++++++++++++++++- 1 file changed, 155 insertions(+), 4 deletions(-) diff --git a/services/HybridCompute/machines/alerts.yaml b/services/HybridCompute/machines/alerts.yaml index adabf24f1..3aafc1171 100644 --- a/services/HybridCompute/machines/alerts.yaml +++ b/services/HybridCompute/machines/alerts.yaml @@ -1,5 +1,5 @@ -- name: HybridVMHighDataDiskReadLatencyAlert - description: Log Alert for Hybrid Virtual Machine dataDiskReadLatency +- name: Hybrid VM Data Disk Read Latency Alert + description: Log Alert for Hybrid Virtual Machine Data Disk Read Latency type: Log verified: false visible: true @@ -8,10 +8,10 @@ properties: severity: 2 operator: GreaterThan - timeAggregation: Count + timeAggregation: Average windowSize: PT15M evaluationFrequency: PT5M - threshold: 30 + threshold: 25 metricMeasureColumn: AggregatedValue resouceIdColumn: _ResourceId dimensions: @@ -49,3 +49,154 @@ scope: Subscription multiResource: false guid: 6aff7331-001f-4ee6-b5d2-1fc43b18b7de +- name: Hybrid VM Data Disk Free Space Percentage Alert + description: Log Alert for Hybrid VM Data Disk Free Space Percentage + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: LessThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 10 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk"and Name == "FreeSpacePercentage" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk !in ("C:","/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer,_ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid VM Data Disk Free Space Percentage Alert + template: Deploy-Hybrid-VM-DataDiskSpace-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 2030d931-6431-4134-9a03-106ebb83cb2d +- name: Hybrid VM Data Disk Write Latency Alert + description: Log Alert for Hybrid Virtual Machine Data Disk Write Latency (ms) + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 25 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk" and Name == "WriteLatencyMs" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk !in ("C:","/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated,15m), Computer, _ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor hybrid virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid VM Data Disk Write Latency Alert + template: Deploy-Hybrid-VM-DataDiskWriteLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: c4226730-ae59-4607-bddc-03b91dad1c4b +- name: Hybrid VM Disconnected Alert + description: Log Alert for Hybrid Virtual Machine Disconnected + type: Log + verified: false + visible: true + properties: + severity: 1 + operator: GreaterThan + timeAggregation: Average + windowSize: P1D + evaluationFrequency: PT10M + threshold: 10 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'resources + | where type == "microsoft.hybridcompute/machines" + | where tostring(properties.status) == "Disconnected" + | extend lastContactedDate = todatetime(properties.lastStatusChange) + | where lastContactedDate <= ago(totimespan(10m)) + | extend status = tostring(properties.status) + | project id, Computer=name, status, lastContactedDate' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid VM Disconnected Alert + template: Deploy-Hybrid-VM-Disconnected-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 34da0d5b-5ccb-474e-9811-3d5fdb81053c From 59300fce1fbc7850dbbaca3fec2506247c21923f Mon Sep 17 00:00:00 2001 From: Justin Dyer Date: Thu, 5 Dec 2024 10:14:27 -0500 Subject: [PATCH 3/4] Added initial alerts for Hybrid Machines --- services/HybridCompute/machines/alerts.yaml | 387 ++++++++++++++++++-- 1 file changed, 365 insertions(+), 22 deletions(-) diff --git a/services/HybridCompute/machines/alerts.yaml b/services/HybridCompute/machines/alerts.yaml index 3aafc1171..af41f6d5a 100644 --- a/services/HybridCompute/machines/alerts.yaml +++ b/services/HybridCompute/machines/alerts.yaml @@ -1,5 +1,5 @@ -- name: Hybrid VM Data Disk Read Latency Alert - description: Log Alert for Hybrid Virtual Machine Data Disk Read Latency +- name: Hybrid Machine Data Disk Read Latency Alert + description: Log Alert for Hybrid Machine Data Disk Read Latency type: Log verified: false visible: true @@ -40,7 +40,7 @@ - name: 'Monitor virtual machines with Azure Monitor: Alerts' url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules deployments: - - name: Deploy Hybrid VM Data Disk Read Latency Alert + - name: Deploy Hybrid Machine Data Disk Read Latency Alert template: Deploy-Hybrid-VM-DataDiskReadLatency-Alert.json type: Policy tags: @@ -49,8 +49,8 @@ scope: Subscription multiResource: false guid: 6aff7331-001f-4ee6-b5d2-1fc43b18b7de -- name: Hybrid VM Data Disk Free Space Percentage Alert - description: Log Alert for Hybrid VM Data Disk Free Space Percentage +- name: Hybrid Machine Data Disk Free Space Percentage Alert + description: Log Alert for Hybrid Machine Data Disk Free Space Percentage type: Log verified: false visible: true @@ -91,7 +91,7 @@ - name: 'Monitor virtual machines with Azure Monitor: Alerts' url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules deployments: - - name: Deploy Hybrid VM Data Disk Free Space Percentage Alert + - name: Deploy Hybrid Machine Data Disk Free Space Percentage Alert template: Deploy-Hybrid-VM-DataDiskSpace-Alert.json type: Policy tags: @@ -100,8 +100,8 @@ scope: Subscription multiResource: false guid: 2030d931-6431-4134-9a03-106ebb83cb2d -- name: Hybrid VM Data Disk Write Latency Alert - description: Log Alert for Hybrid Virtual Machine Data Disk Write Latency (ms) +- name: Hybrid Machine Data Disk Write Latency Alert + description: Log Alert for Hybrid Machine Data Disk Write Latency (ms) type: Log verified: false visible: true @@ -142,7 +142,7 @@ - name: 'Monitor hybrid virtual machines with Azure Monitor: Alerts' url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules deployments: - - name: Deploy Hybrid VM Data Disk Write Latency Alert + - name: Deploy Hybrid Machine Data Disk Write Latency Alert template: Deploy-Hybrid-VM-DataDiskWriteLatency-Alert.json type: Policy tags: @@ -151,8 +151,8 @@ scope: Subscription multiResource: false guid: c4226730-ae59-4607-bddc-03b91dad1c4b -- name: Hybrid VM Disconnected Alert - description: Log Alert for Hybrid Virtual Machine Disconnected +- name: Hybrid Machine Heartbeat Alert + description: Log Alert for Hybrid Machine Heartbeat type: Log verified: false visible: true @@ -160,8 +160,8 @@ severity: 1 operator: GreaterThan timeAggregation: Average - windowSize: P1D - evaluationFrequency: PT10M + windowSize: PT15M + evaluationFrequency: PT5M threshold: 10 metricMeasureColumn: AggregatedValue resouceIdColumn: _ResourceId @@ -177,13 +177,356 @@ failingPeriods: numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 - query: 'resources + query: 'Heartbeat | where type == "microsoft.hybridcompute/machines" - | where tostring(properties.status) == "Disconnected" - | extend lastContactedDate = todatetime(properties.lastStatusChange) - | where lastContactedDate <= ago(totimespan(10m)) - | extend status = tostring(properties.status) - | project id, Computer=name, status, lastContactedDate' + | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId + | extend Duration = datetime_diff("minute",now(),TimeGenerated) + | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine HeartBeat Alert for all Machines in the subscription + template: Deploy-Hybrid-VM-HeartBeat-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 7ade7362-3df1-4ad2-a000-78fa7b3d9b27 +- name: Hybrid Machine Network Read Alert + description: Log Alert for Hybrid Machine Network Read (bytes/sec) + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 10000000 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "Network" and Name == "ReadBytesPerSecond" + | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine Nework Read (bytes/sec) Alert + template: Deploy-Hybrid-VM-NetworkIn-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 5e223d13-112e-4f84-82ba-e03a76f6350f +- name: Hybrid Machine Network Write Alert + description: Log Alert for Hybrid Machine Network Write (bytes/sec) + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 10000000 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "Network" and Name == "WriteBytesPerSecond" + | extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"]) + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine Network Write (bytes/sec) Alert + template: Deploy-Hybrid-VM-NetworkOut-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: bb1969d8-eaa2-45b6-bd9f-09348b1ee346 +- name: Hybrid Machine OS Disk Read Latency Alert + description: Log Alert for Hybrid Machine Data OS Read Latency (ms) + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 25 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk" and Name == "ReadLatencyMs" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk in ("C:","/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine OS Disk Read Latency Alert + template: Deploy-Hybrid-VM-OSDiskReadLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 4b62cf77-069b-42a3-9608-8cd585a640f1 +- name: Hybrid Machine OS Disk Free Space Percentage + description: Log Alert for Hybrid Machine OS Disk Free Space Percentage + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: LessThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 10 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk" and Name == "FreeSpacePercentage" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk in ("C:","/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine OS Disk Free Space Percentage Alert + template: Deploy-Hybrid-VM-OSDiskSpace-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 6a96dc94-674f-4a9c-830e-a0a4f7383646 +- name: Hybrid Machine OS Disk Write Latency (ms) + description: Log Alert for Hybrid Machine OS Disk Write Latency (ms) + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 25 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + - name: Disk + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "LogicalDisk" and Name == "WriteLatencyMs" + | extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"]) + | where Disk in ("C:","/") + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine OS Disk Write Latency Alert + template: Deploy-Hybrid-VM-OSDiskWriteLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 50ee79d0-a321-4217-8f5b-b096654ad1ce +- name: Hybrid Machine Processor Utilization Percentage + description: Log Alert for Hybrid Machine Processor Utilization Percentage + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: GreaterThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 85 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "Processor" and Name == "UtilizationPercentage" + | summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId' + autoMitigate: true + autoResolve: true + autoResolveTime: 0:10:00 + references: + - name: 'Monitor virtual machines with Azure Monitor: Alerts' + url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules + deployments: + - name: Deploy Hybrid Machine Processor Utilization Percentage Alert + template: Deploy-Hybrid-VM-PercentCPU-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + guid: 639014b8-0ac6-4ae1-a887-bb3979669287 +- name: Hybrid Machine Available Memory Percentage + description: Log Alert for Hybrid Machine Available Memory Percentage + type: Log + verified: false + visible: true + tags: + - alz + properties: + severity: 2 + operator: LessThan + timeAggregation: Average + windowSize: PT15M + evaluationFrequency: PT5M + threshold: 10 + metricMeasureColumn: AggregatedValue + resouceIdColumn: _ResourceId + dimensions: + - name: Computer + operator: Include + values: + - '*' + failingPeriods: + numberOfEvaluationPeriods: 1 + minFailingPeriodsToAlert: 1 + query: 'InsightsMetrics + | where _ResourceId has "Microsoft.HybridCompute/machines" + | where Origin == "vm.azm.ms" + | where Namespace == "Memory" and Name == "AvailableMB" + | extend TotalMemory = toreal(todynamic(Tags)["vm.azm.ms/memorySizeMB"]) + | extend AvailableMemoryPercentage = (toreal(Val) / TotalMemory) * 100.0 + | summarize AggregatedValue = avg(AvailableMemoryPercentage) by bin(TimeGenerated,15m), Computer, _ResourceId' autoMitigate: true autoResolve: true autoResolveTime: 0:10:00 @@ -191,12 +534,12 @@ - name: 'Monitor virtual machines with Azure Monitor: Alerts' url: https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules deployments: - - name: Deploy Hybrid VM Disconnected Alert - template: Deploy-Hybrid-VM-Disconnected-Alert.json + - name: Deploy Hybrid Machine Available Memory Percentage Alert + template: Deploy-Hybrid-VM-PercentMemory-Alert.json type: Policy tags: - alz properties: scope: Subscription multiResource: false - guid: 34da0d5b-5ccb-474e-9811-3d5fdb81053c + guid: 67300599-c61e-48d2-b47d-979b2254d494 From 3c121f4f62737032798dc7a3483cdc51c1f6c30c Mon Sep 17 00:00:00 2001 From: Justin Dyer Date: Thu, 5 Dec 2024 10:47:34 -0500 Subject: [PATCH 4/4] Fix heartbeat query --- services/HybridCompute/machines/alerts.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/HybridCompute/machines/alerts.yaml b/services/HybridCompute/machines/alerts.yaml index af41f6d5a..ac6e64997 100644 --- a/services/HybridCompute/machines/alerts.yaml +++ b/services/HybridCompute/machines/alerts.yaml @@ -178,7 +178,7 @@ numberOfEvaluationPeriods: 1 minFailingPeriodsToAlert: 1 query: 'Heartbeat - | where type == "microsoft.hybridcompute/machines" + | where _ResourceId has "Microsoft.HybridCompute/machines" | summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId | extend Duration = datetime_diff("minute",now(),TimeGenerated) | summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId'