From e295e37be02746c9e96f0cc0e1a00c97403ced2d Mon Sep 17 00:00:00 2001 From: Joseph Barnes Date: Tue, 3 Oct 2023 05:59:31 -0500 Subject: [PATCH] updated alz alert details to be data driven --- docs/content/patterns/alz/Alerts-Details.md | 89 +------- docs/layouts/shortcodes/alertList.html | 2 +- .../alzActivityLogAdministrativeAlerts.html | 73 +++++++ .../alzActivityLogResourceHealthAlerts.html | 72 +++++++ .../alzActivityLogServiceHealthAlerts.html | 69 ++++++ docs/layouts/shortcodes/alzMetricAlerts.html | 95 ++++++++ .../shortcodes/alzVMInsightsLogAlerts.html | 95 ++++++++ .../Automation/automationAccounts/alerts.yaml | 12 +- services/Compute/virtualMachines/alerts.yaml | 202 ++++++++++++++++-- services/KeyVault/vaults/alerts.yaml | 51 ++++- .../Network/applicationGateways/alerts.yaml | 94 ++++++-- services/Network/azureFirewalls/alerts.yaml | 39 +++- .../Network/expressRouteCircuits/alerts.yaml | 76 +++++-- .../Network/expressRouteGateways/alerts.yaml | 51 +++-- .../Network/expressRoutePorts/alerts.yaml | 91 ++++++-- services/Network/loadBalancers/alerts.yaml | 78 ++++--- .../Network/networkSecurityGroups/alerts.yaml | 17 +- services/Network/privateDnsZones/alerts.yaml | 42 +++- .../Network/publicIPAddresses/alerts.yaml | 44 +++- services/Network/routeTables/alerts.yaml | 17 +- .../virtualNetworkGateways/alerts.yaml | 123 ++++++++--- services/Network/virtualNetworks/alerts.yaml | 10 +- services/Network/vpnGateways/alerts.yaml | 157 ++++++++++---- .../workspaces/alerts.yaml | 42 +++- services/RecoveryServices/alerts.yaml | 15 -- services/RecoveryServices/vaults/alerts.yaml | 27 +++ services/Resources/subscriptions/alerts.yaml | 78 +++++-- services/Storage/storageAccounts/alerts.yaml | 35 ++- .../storageSyncServices/alerts.yaml | 3 + .../StreamAnalytics/streamingjobs/alerts.yaml | 10 + services/Synapse/workspaces/alerts.yaml | 8 + services/Web/hostingEnvironments/alerts.yaml | 2 + services/Web/serverFarms/alerts.yaml | 9 + services/Web/sites/alerts.yaml | 29 +++ 34 files changed, 1518 insertions(+), 339 deletions(-) create mode 100644 docs/layouts/shortcodes/alzActivityLogAdministrativeAlerts.html create mode 100644 docs/layouts/shortcodes/alzActivityLogResourceHealthAlerts.html create mode 100644 docs/layouts/shortcodes/alzActivityLogServiceHealthAlerts.html create mode 100644 docs/layouts/shortcodes/alzMetricAlerts.html create mode 100644 docs/layouts/shortcodes/alzVMInsightsLogAlerts.html delete mode 100644 services/RecoveryServices/alerts.yaml diff --git a/docs/content/patterns/alz/Alerts-Details.md b/docs/content/patterns/alz/Alerts-Details.md index cb1a584a1..c343dc9e6 100644 --- a/docs/content/patterns/alz/Alerts-Details.md +++ b/docs/content/patterns/alz/Alerts-Details.md @@ -24,52 +24,7 @@ Only a small number of the resources support metric alert rules scoped at the su > **NOTE**: There are hidden columns within the table, to scroll across you need to go to the bottom of the table to scroll and this is a limitation within tables in GitHub. If you have any suggestions to improving this expeirence please do get in touch via a PR or raise an issue, thank you. -| AlertName | Component | Metric | Aggregation | Operator | Threshold | WindowSize | Frequency | Severity | Scope | Support for Multiple Resources | Verified | References | -|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------|---------------------------------------------------|-------------|--------------------|------------|------------|-----------|----------|----------|--------------------------------|----------|| -| [Deploy ExpressRoute Circuits Bgp Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-ercir_bgpavailability_alert.bicep)1 | microsoft.network/expressroutecircuits | BgpAvailability | Average | LessThan | 90 | PT5M | PT1M | 0 | Resource | No | Y | [Monitor ExpressRoute Alerts](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts)
[ExpressRoute KQL Queries](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries) | -| [Deploy ExpressRoute Circuits Arp Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-ercir_arpavailability_alert.bicep)1 | microsoft.network/expressroutecircuits | ArpAvailability | Average | LessThan | 90 | PT5M | PT1M | 0 | Resource | No | Y | [Monitor ExpressRoute Alerts](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts)
[ExpressRoute KQL Queries](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries) | -| [Deploy ExpressRoute Circuits QosDropBitsInPerSecond Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-ercir_qosdropsbitsin_alert.bicep) | microsoft.network/expressroutecircuits | QosDropBitsInPerSecond | Average | GreaterThan | 100 | PT5M | PT1M | 2 | Resource | No | N | [Monitor ExpressRoute Alerts](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts)
[ExpressRoute KQL Queries](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries) | -| [Deploy ExpressRoute Circuits QosDropBitsOutPerSecond Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-ercir_qosdropsbitsout_alert.bicep) | microsoft.network/expressroutecircuits | QosDropBitsOutPerSecond | Average | GreaterThan | 100 | PT5M | PT1M | 2 | Resource | No | N | [Monitor ExpressRoute Alerts](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts)
[ExpressRoute KQL Queries](https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries) | -| [Deploy KeyVault Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-kv_availability_alert.bicep)1 | Microsoft.KeyVault/vaults | Availability | Average | LessThan | 90 | PT5M | PT1M | 1 | Resource | Yes | Y | [Monitoring KeyVault Reference](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference)
[Monitoring KeyVault](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault)
[KeyVault Insights Overview](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview) | -| [Deploy KeyVault Capacity Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-kv_capacity_alert.bicep) | Microsoft.KeyVault/vaults | SaturationShoebox | Average | GreaterThan | 75 | PT5M | PT1M | 1 | Resource | Yes | Y | [Monitoring KeyVault Reference](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference)
[Monitoring KeyVault](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault)
[KeyVault Insights Overview](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview) | -| [Deploy KeyVault Latency Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-kv_latency_alert.bicep) | Microsoft.KeyVault/vaults | ServiceApiLatency | Average | GreaterThan | 1000 | PT5M | PT1M | 3 | Resource | Yes | Y | [Monitoring KeyVault Reference](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference)
[Monitoring KeyVault](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault)
[KeyVault Insights Overview](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview) | -| [Deploy KeyVault Requests Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-kv_requests_alert.bicep) | Microsoft.KeyVault/vaults | ServiceApiResult | Average | GreaterThan | dynamic | PT5M | PT1M | 2 | Resource | Yes | Y | [Monitoring KeyVault Reference](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference)
[Monitoring KeyVault](https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault)
[KeyVault Insights Overview](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview) | -| [Deploy Automation Account TotalJob Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-aa_totaljob_alert.bicep) | Microsoft.Automation/automationAccounts | TotalJob | Count | GreaterThan | 0 | PT5M | PT1M | 2 | Resource | No | N | [Azure Automation Azure Monitor Metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftautomationautomationaccounts) | -| [Deploy AFW FirewallHealth Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-afw_firewallhealth_alert.bicep) | Microsoft.Network/azureFirewalls | FirewallHealth | Average | LessThan | 90 | PT5M | PT1M | 0 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy AFW SNATPortUtilization Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-afw_snatportutilization_alert.bicep) | Microsoft.Network/azureFirewalls | SNATPortUtilization | Average | GreaterThan | 80 | PT5M | PT1M | 1 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy ALB Data Path Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-alb_datapathavailability_alert.bicep) | Microsoft.Network/loadBalancers | VipAvailability | Average | LessThan | | PT5M | PT1M | 0 | Resource | No | Y | Public Contribution
[Azure Monitor supported metrics by resource type - Azure Monitor](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkloadbalancers)
[Multi-Demensional-Metrics](https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#multi-dimensional-metrics)
[Is The Data Path Up and Available for My Load-Balancer](https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#is-the-data-path-up-and-available-for-my-load-balancer-frontend) | -| [Deploy ALB Health Probe Status Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-alb_healthprobestatus_alert.bicep) | Microsoft.Network/loadBalancers | DipAvailability | Average | LessThan | 90 | PT5M | PT1M | 0 | Resource | No | Y | Public Contribution
[Azure Monitor supported metrics by resource type - Azure Monitor](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkloadbalancers)
[Are Backend Instances for my Load-Balancer Responding to Probes](https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#are-the-backend-instances-for-my-load-balancer-responding-to-probes) | -| [Deploy ALB Used SNAT Ports Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-alb_usedsnatports_alert.bicep) | Microsoft.Network/loadBalancers | UsedSnatPorts | Average | GreaterThan | 900 | PT5M | PT1M | 0 | Resource | No | Y | Public Contribution
[Azure Monitor supported metrics by resource type - Azure Monitor](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkloadbalancers)
[Load-BalancerAlerts](https://learn.microsoft.com/en-us/azure/load-balancer/monitor-load-balancer#alerts)
[Check My SNAT Port Usage and Allocation](https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#how-do-i-check-my-snat-port-usage-and-allocation) | -| [Deploy SA Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-sa_availability_alert.bicep)1 | Microsoft.Storage/storageAccounts | Availability | Average | LessThan | 90 | PT5M | PT5M | 1 | Resource | No | Y | [Monitoring Availability](https://docs.microsoft.com/en-us/azure/storage/common/storage-monitoring-diagnosing-troubleshooting?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=dotnet#monitoring-availability
) | -| [Deploy VPNG BGP Peer Status Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_bgppeerstatus_alert.bicep) | microsoft.network/vpngateways | BgpPeerStatus | Total | LessThan | 1 | PT5M | PT5M | | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy VPNG Ingress Packet Drop Mismatch Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_ingresspacketdropmismatch_alert.bicep) | microsoft.network/vpngateways | TunnelIngressPacketDropTSMismatch | Average | GreaterThan | dynamic | PT5M | PT5M | 3 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy VPNG Egress Packet Drop Count Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_egresspacketdropcount_alert.bicep) | microsoft.network/vpngateways | TunnelEgressPacketDropCount | Total | GreaterThan | dynamic | PT5M | PT5M | 3 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy VPNG Ingress Packet Drop Count Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_ingresspacketdropcount_alert.bicep) | microsoft.network/vpngateways | TunnelIngressPacketDropCount | Total | GreaterThan | dynamic | PT5M | PT5M | 3 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy VPNG Egress Packet Drop Mismatch Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_egresspacketdropmismatch_alert.bicep) | microsoft.network/vpngateways | TunnelEgressPacketDropTSMismatch | Total | GreaterThan | dynamic | PT5M | PT5M | 3 | Resource | No | N | [Overview of Azure Firewall logs and metrics](https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics) | -| [Deploy VNetG ExpressRoute CPU Utilization Alert'](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_expressroutecpuutilization_alert.bicep) | microsoft.network/virtualNetworkGateways | ExpressRouteGatewayCpuUtilization | Average | GreaterThan | 90 | PT5M | PT1M | 1 | Resource | No | Y | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG ExpressRoute CPU Utilization Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_expressroutecpuutilization_alert.bicep) | microsoft.network/expressroutegateways | ExpressRouteGatewayCpuUtilization | Average | GreaterThan | 80 | PT5M | PT1M | 1 | Resource | No | Y | [ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways](https://docs.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways) | -| [Deploy PDNSZ Capacity Utilization Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pdnsz_capacityutilization_alert.bicep) | Microsoft.Network/privateDnsZones | VirtualNetworkLinkCapacityUtilization | Maximum | GreaterThanEqualTo | 80 | PT1H | PT1H | 2 | Resource | No | N | [Private DNS Alert Metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkprivatednszones) | -| [Deploy PDNSZ Query Volume Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pdnsz_queryvolume_alert.bicep) | Microsoft.Network/privateDnsZones | QueryVolume | Total | GreaterThanOrEqual | 500 | PT1H | PT1H | 4 | Resource | No | N | [Private DNS Alert Metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkprivatednszones) | -| [Deploy PDNSZ Record Set Capacity Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pdnsz_recordsetcapacity_alert.bicep) | Microsoft.Network/privateDnsZones | RecordSetCapacityUtilization | Maximum | GreaterThanOrEqual | 75 | PT1H | PT1H | 2 | Resource | No | N | [Private DNS Alert Metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkprivatednszones) | -| [Deploy PDNSZ Registration Capacity Utilization Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pdnsz_registrationcapacityutilization_alert.bicep) | Microsoft.Network/privateDnsZones | VirtualNetworkWithRegistrationCapacityUtilization | Maximum | GreaterThan | 90 | PT1H | PT1H | 2 | Resource | No | N | [Private DNS Alert Metrics](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkprivatednszones) | -| [Deploy PIP Bytes in DDoS Attack Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pip_bytesinddosattack_alert.bicep) | Microsoft.Network/publicIPAddresses | bytesinddos | Maximum | GreaterThan | 8000000 | PT5M | PT5M | 4 | Resource | No | N | [Monitor Public IP Addresses](https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts
[[Public IP Addresses Supported Metrics](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses) | -| [Deploy PIP DDoS Attack Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pip_ddosattack_alert.bicep) | Microsoft.Network/publicIPAddresses | ifunderddosattack | Maximum | GreaterThan | 1 | PT5M | PT5M | 1 | Resource | No | Y | [Monitor Public IP Addresses](https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts)
[Public IP Addresses Supported Metrics](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses) | -| [Deploy PIP Packets in DDoS Attack Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pip_packetsinddos_alert.bicep) | Microsoft.Network/publicIPAddresses | PacketsInDDoS | Total | GreaterThanEqualTo | 40000 | PT5M | PT5M | 4 | Resource | No | N | [Monitor Public IP Addresses](https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts)
[Public IP Addresses Supported Metrics](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses) | -| [Deploy PIP VIP Availability Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-pip_vipavailability_alert.bicep) | Microsoft.Network/publicIPAddresses | VipAvailability | Average | LessThan | 1 | PT5M | PT5M | 1 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses) | -| [Deploy VNet DDoS Attack Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnet_ddosattack_alert.bicep) | Microsoft.Network/virtualNetworks | ifunderddosattack | Maximum | GreaterThanOrEqual | 1 | PT5M | PT5M | 1 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworks) | -| [Deploy VNetG Tunnel Bandwidth Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_bandwidthutilization_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelAverageBandwidth | Average | LessThan | 1 | PT5M | PT5M | 0 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG Tunnel Egress Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_egress_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelEgressBytes | Average | LessThanOrEqual | 1 | PT5M | PT5M | 0 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG Tunnel Ingress Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_ingress_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelIngressBytes | Average | LessThanOrEqual | 1 | PT5M | PT5M | 0 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VPNG Bandwidth Utilization Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_bandwidthutilization_alert.bicep) | microsoft.network/vpngateways | tunnelaveragebandwidth | Average | GreaterThan | 1000000000 | PT5M | PT5M | 0 | Resource | No | N | [Monitor VPN Gateway](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway)
[Monitor VPN Gateway Reference](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway-reference)[
Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvpngateways) | -| [Deploy VPNG Egress Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_egress_alert.bicep) | microsoft.network/vpngateways | tunnelegressbytes | Total | LessThanOrEqual | 0 | PT5M | PT5M | 0 | Resource | No | N | [Monitor VPN Gateway](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway)
[Monitor VPN Gateway Reference](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway-reference)[
Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvpngateways) | -| [Deploy VPNG Ingress Alert](https://github.com/Azure/alz-monitor/blob/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vpng_ingress_alert.bicep) | microsoft.network/vpngateways | tunnelingressbytes | Total | LessThanOrEqual | 0 | PT5M | PT5M | 0 | Resource | No | N | [Monitor VPN Gateway](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway)
[Monitor VPN Gateway Reference](https://learn.microsoft.com/en-us/azure/vpn-gateway/monitor-vpn-gateway-reference)[
Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvpngateways) | -| [Deploy VPNG Ingress Packet Drop Mismatch Alert](https://github.com/Azure/alz-monitor/blob/fdd0ab013ee6f35a53e130384734e20c0a722b54/Observability_L100/Deploy/policyDefinitions/deploy-vpng_ingresspacketdropmismatch_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelIngressPacketDropTSMismatch | Average | GreaterThan | 100 | PT5M | PT5M | 3 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG Egress Packet Drop Mismatch Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_egresspacketdropmismatch_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelEgressPacketDropCount | Average | GreaterThan | 100 | PT5M | PT5M | 3 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG Ingress Packet Drop Count Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_ingresspacketdropcount_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelIngressPacketDropCount | Average | GreaterThan | 100 | PT5M | PT5M | 3 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG Egress Packet Drop Mismatch Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_egresspacketdropmismatch_alert.bicep) | Microsoft.Network/virtualNetworkGateways | TunnelEgressPacketDropTSMismatch | Average | GreaterThan | 100 | PT5M | PT5M | 3 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy VNetG ExpressRoute Bits Per Second Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-vnetg_expressroutebitspersecond_alert.bicep) | Microsoft.Network/virtualNetworkGateways | ExpressRouteGatewayBitsPerSecond | Average | LessThanOrEqual | 1 | PT5M | PT5M | 0 | Resource | No | N | [Azure Monitor supported metrics by resource type - Azure Monitor](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkvirtualnetworkgateways) | -| [Deploy ERG ExpressRoute Bits In Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-erg_bitsinpersecond_alert.bicep) | microsoft.network/expressroutegateways | ERGatewayConnectionBitsInPerSecond | Average | LessThanOrEqual | 1 | PT5M | PT5M | 0 | Resource | No | N | [ExpressRoute Monitoring Metrics Alerts - ExpressRoute-Gateways](https://docs.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways) | -| [Deploy ERG ExpressRoute Bits Out Alert](https://github.com/Azure/alz-monitor/blob/arm-conversion-2023-01-26/src/resources/Microsoft.Authorization/policyDefinitions/deploy-erg_bitsoutpersecond_alert.bicep) | microsoft.network/expressroutegateways | ERGatewayConnectionBitsOutPerSecond | Average | LessThanOrEqual | 1 | PT5M | PT5M | 0 | Resource | No | N | [ExpressRoute Monitoring Metrics Alerts - ExpressRoute-Gateways](https://docs.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways) | +{{< alzMetricAlerts >}} 1 See "Why are the availability alert thresholds lower than 100% in this solution when the product group documention recommends 100%?" in the [FAQ](FAQ.md) for more details. @@ -79,21 +34,11 @@ Only a small number of the resources support metric alert rules scoped at the su Use the following two sections to quickly know when there's a Service Health issue with an Azure resource, saving you the effort of further troubleshooting and allow you to focus on communicating to your user base and/or use these alerts as part of your business continuity actions (remediations). -| Alert Policy Name | Alert Name | targetScope | Category | Property.cause | Properties.currentHealthStatus | Scope | Verified |
References
| -|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------|-----------------|----------------|-------------------------------------|--------------------------------|--------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Deploy Resource Health Unhealthy Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-ResourceHealth-UnHealthly-alert.json) | ResourceHealthUnhealthyAlert | managementGroup | ResourceHealth | PlatformInitiated,
UserInitiated | Degraded,
Unavailable | Subscription | Yes | [Resource Health](https://learn.microsoft.com/en-us/azure/service-health/resource-health-overview)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | - - +{{< alzActivityLogResourceHealthAlerts >}} ### Service Health Alerts -| Alert Policy Name | Alert Name | PolicyScope | Category | properties.incidentType | Scope | Documented |
References
| -|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------|-----------------|---------------|-------------------------|--------------|------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Deploy Service Health Advisory Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-ServiceHealth-Health.json) | ServiceHealthAdvisoryEvent | managementGroup | ServiceHealth | ActionRequired | Subscription | Yes | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Service Health Incident Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-ServiceHealth-Incident.json) | ServiceHealthIncident | managementGroup | ServiceHealth | Incident | Subscription | Yes | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Service Health Maintenance Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-ServiceHealth-Maintenance.json) | ServiceHealthPlannedMaintenance | managementGroup | ServiceHealth | Maintenance | Subscription | Yes | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Service Health Security Advisory Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-ServiceHealth-Security.json) | ServiceHealthSecurityIncident | managementGroup | ServiceHealth | Security | Subscription | Yes | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | - +{{< alzActivityLogServiceHealthAlerts >}} ### Activity Log Administrative @@ -101,15 +46,7 @@ The following table lists a number of operational Activity Log alerts to alert y There isn't any per resource type guidance so what's been provided is some general guidance on alerting on the deletion of specific resources, the list may grow in the future and of course you can create your own following the pattern used for these Activity Log alerts. -| Alert Policy Name | Alert Name | PolicyScope | category | operationName | status | Scope | Documented |
References
| -|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------|-----------------|----------------|---------------------------------------------------------------------|-----------|--------------|------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Deploy Activity Log Azure FireWall Delete Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-AzureFirewall-Del.json) | ActivityAzureFirewallDelete | managementGroup | Administrative | Microsoft.Microsoft.Network/azurefirewalls/delete | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log Key Vault Delete Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-KeyVault-Del.json) | ActivityKeyVaultDelete | managementGroup | Administrative | Microsoft.KeyVault/vaults/delete | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log LA Workspace Delete Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-LAWorkspace-Del.json) | ActivityLAWorkspaceDelete | managementGroup | Administrative | Microsoft.OperationalInsights/workspaces/delete | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log LA Workspace Regenerate Key Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-LAWorkspace-ReGen.json) | ActivityLAWorkspaceRegenKey | managementGroup | Administrative | Microsoft.OperationalInsights/workspaces/regeneratesharedkey/action | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log NSG Delete Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-NSG-Del.json) | ActivityNSGDelete | managementGroup | Administrative | Microsoft.Network/networkSecurityGroups/delete | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log Route Table Update Alert](https://github.com/Azure/alz-monitor/tree/main/src/resources/Microsoft.Authorization/policyDefinitions/deploy-activitylog-RouteTable-Update.json) | ActivityUDRUpdate | managementGroup | Administrative | Microsoft.Network/routeTables/routes/write | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | -| [Deploy Activity Log VPN Gateway Delete Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-activitylog-VPNGate-Del.json) | ActivityVPNGatewayDelete | managementGroup | Administrative | Microsoft.Network/vpnGateways/delete | succeeded | Subscription | No | [Activity Log Service Notifications](https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal)
[Best practices for setting up service health alerts](https://www.microsoft.com/en-us/videoplayer/embed/RE2OtUa) | +{{< alzActivityLogAdministrativeAlerts >}} ## VM Insights Log Alerts @@ -117,19 +54,7 @@ Once VM Insights has been enabled in your environment, the following alert rules N/A: Not applicable, not used in the query or used as a parameter. -| AlertName | Component | Aggregation | Operator | Threshold | WindowSize | Frequency | ResolveTime | EvaluationPeriods | FailingPeriods | ComputersToInclude | Other Resources | Severity | Query | Verified | References | -|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------|-------------|-------------|-----------|------------|-----------|-------------|-------------------|----------------|--------------------|----------------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Deploy VM Available Memory Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-PercentMemory_alert.json) | Microsoft.Compute/virtualMachines | Average | LessThan | 1000 | PT15M | PT5M | 0:10:00 | 1 | 1 | N/A | N/A | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "Memory" and Name == "AvailableMB"\| extend TotalMemory = toreal(todynamic(Tags)["vm.azm.ms/memorySizeMB"]) \| extend AvailableMemoryPercentage = (toreal(Val) / TotalMemory) * 100.0\| summarize AggregatedValue = avg(AvailableMemoryPercentage) by bin(TimeGenerated, 15m), Computer, _ResourceId``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM CPU Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-PercentCPU_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 85 | PT15M | PT5M | 0:10:00 | N/A | 1 | N/A | N/A | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "Processor" and Name == "UtilizationPercentage"\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM Data Disk Write Latency Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-dataDiskwriteLatency_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 50 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
* | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "ReadLatencyMs"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\|where Disk !in (\'C:\',\'/\')\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | N | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM Data Disk Read Latency Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-dataDiskreadLatency_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 50 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
* | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "ReadLatencyMs"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\|where Disk !in (\'C:\',\'/\')\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | N | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM OS Disk Write Latency Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-OSDiskwriteLatency_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 50 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
C:
/ | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "WriteLatencyMs"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | N | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM OS Disk Read Latency Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-OSDiskreadLatency_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 30 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
C:
/ | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "ReadLatencyMs"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | N | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM Network Write Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-NetworkOut_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 10000000 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | NetworkInterfacetToInclude
* | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "Network" and Name == "WriteBytesPerSecond"\| extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"])\|summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM Network Read Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-NetworkIn_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 10000000 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | NetworkInterfacetToInclude
* | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "Network" and Name == "ReadBytesPerSecond"\| extend NetworkInterface=tostring(todynamic(Tags)["vm.azm.ms/networkDeviceId"])\|summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, NetworkInterface``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM OS Disk Space Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-OSDiskSpace_alert.json) | Microsoft.Compute/virtualMachines | Average | LessThan | 10 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
C:
/ | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "FreeSpacePercentage"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM Data Disk Space Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-dataDiskSpace_alert.json) | Microsoft.Compute/virtualMachines | Average | LessThan | 10 | PT15M | PT5M | 0:10:00 | 1 | 1 | * | parDisksToInclude
* | 2 | ```InsightsMetrics\| where Origin == "vm.azm.ms"\| where Namespace == "LogicalDisk" and Name == "FreeSpacePercentage"\| extend Disk=tostring(todynamic(Tags)["vm.azm.ms/mountId"])\|where Disk !in (\'C:\',\'/\')\| summarize AggregatedValue = avg(Val) by bin(TimeGenerated, 15m), Computer, _ResourceId, Disk``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | -| [Deploy VM HeartBeat Alert](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-vm-HeartBeat_alert.json) | Microsoft.Compute/virtualMachines | Average | GreaterThan | 10 | PT15M | PT5M | 0:10:00 | 1 | 1 | N/A | N/A | 1 | ```Heartbeat\| summarize TimeGenerated=max(TimeGenerated) by Computer, _ResourceId\| extend Duration = datetime_diff('minute',now(),TimeGenerated)\| summarize AggregatedValue = min(Duration) by Computer, bin(TimeGenerated,5m), _ResourceId``` | Y | [Monitor virtual machines with Azure Monitor: Alerts](https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules) | +{{< alzVMInsightsLogAlerts >}} ## Recovery Vault Alerts @@ -139,6 +64,4 @@ Security Alerts and Job Failure alerts are summarized in the "[Using Backup Cent | PolicyName | Component | Category | Scope | Support for Multiple Resources | Verified | References | |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------|-------------------------------------------------------------------------------------------------------|----------|--------------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Deploy RV Backup Health Monitoring Alerts](../blob/main/src/resources/Microsoft.Authorization/policyDefinitions/amba/deploy-rv_backuphealth_monitor.json) | Microsoft.RecoveryServices/Vaults | Microsoft.RecoveryServices/vaults/monitoringSettings.classicAlertSettings.alertsForCriticalOperations | Resource | No | Y | [Azure Monitor Alerts for Azure Backup](https://learn.microsoft.com/en-us/azure/backup/backup-azure-monitoring-built-in-monitor?tabs=recovery-services-vaults#azure-monitor-alerts-for-azure-backup)
[Move to Azure Monitor Alerts](https://learn.microsoft.com/en-us/azure/backup/move-to-azure-monitor-alerts) | - - +| [Deploy RV Backup Health Monitoring Alerts](../../../services/RecoveryServices/vaults/Modify-RSV-BackupHealth-Alert.json) | Microsoft.RecoveryServices/Vaults | Microsoft.RecoveryServices/vaults/monitoringSettings.classicAlertSettings.alertsForCriticalOperations | Resource | No | Y | [Azure Monitor Alerts for Azure Backup](https://learn.microsoft.com/en-us/azure/backup/backup-azure-monitoring-built-in-monitor?tabs=recovery-services-vaults#azure-monitor-alerts-for-azure-backup)
[Move to Azure Monitor Alerts](https://learn.microsoft.com/en-us/azure/backup/move-to-azure-monitor-alerts) | diff --git a/docs/layouts/shortcodes/alertList.html b/docs/layouts/shortcodes/alertList.html index d139e49ad..24dd23aaa 100644 --- a/docs/layouts/shortcodes/alertList.html +++ b/docs/layouts/shortcodes/alertList.html @@ -70,7 +70,7 @@

Deployment Templates:

{{ range .deployments }} - + {{ $url := path.Join $.Page.File.Dir .template }}
{{ .description }}{{ .name }} View | Download diff --git a/docs/layouts/shortcodes/alzActivityLogAdministrativeAlerts.html b/docs/layouts/shortcodes/alzActivityLogAdministrativeAlerts.html new file mode 100644 index 000000000..8bbd4c1c1 --- /dev/null +++ b/docs/layouts/shortcodes/alzActivityLogAdministrativeAlerts.html @@ -0,0 +1,73 @@ +
+ + + + + + + + + + + + +{{ range $category, $types := $.Site.Data }} + {{ range $type, $rules := $types }} + {{ range $rules.alerts }} + {{ if or (eq .visible true) (eq $.Site.Params.ambaDevMode true) }} + {{ if and (eq .type "ActivityLog") (in .tags "alz") (eq .properties.category "Administrative") }} + {{ $data := newScratch }} + {{ if isset . "deployments" }} + {{ range where .deployments "type" "Policy" }} + {{ if and (in .tags "alz") }} + {{ $data.Set "name" .name }} + {{ $data.Set "url" (relURL (path.Join "services" $category $type .template)) }} + {{ $data.Set "scope" .properties.scope }} + {{ $data.Set "policyScope" .properties.policyScope }} + {{ $data.Set "documented" .properties.documented }} + {{ $data.Set "alertName" .properties.alertName }} + {{ end }} + {{ end }} + {{ end }} + + + + + + + + + + + + {{ end }} + {{ end }} + {{ end }} + {{ end }} +{{ end }} + +
Alert Policy NameAlert NamePolicyScopecategoryoperationNamestatusScopeDocumentedReferences
+ {{ $data.Get "name" }} + + {{ $data.Get "alertName" }} + + {{ $data.Get "policyScope" }} + + {{ .properties.category }} + + {{ .properties.operationName }} + + {{ .properties.status }} + + {{ $data.Get "scope" }} + + {{ if ($data.Get "documented") }} + Yes + {{ else }} + No + {{ end }} + + {{ range .references }} + {{ .name }} + {{ end }} +
diff --git a/docs/layouts/shortcodes/alzActivityLogResourceHealthAlerts.html b/docs/layouts/shortcodes/alzActivityLogResourceHealthAlerts.html new file mode 100644 index 000000000..bda265aa7 --- /dev/null +++ b/docs/layouts/shortcodes/alzActivityLogResourceHealthAlerts.html @@ -0,0 +1,72 @@ +
+ + + + + + + + + + + + +{{ range $category, $types := $.Site.Data }} + {{ range $type, $rules := $types }} + {{ range $rules.alerts }} + {{ if or (eq .visible true) (eq $.Site.Params.ambaDevMode true) }} + {{ if and (eq .type "ActivityLog") (in .tags "alz") (eq .properties.category "ResourceHealth") }} + {{ $data := newScratch }} + {{ if isset . "deployments" }} + {{ range where .deployments "type" "Policy" }} + {{ if and (in .tags "alz") }} + {{ $data.Set "name" .name }} + {{ $data.Set "url" (relURL (path.Join "services" $category $type .template)) }} + {{ $data.Set "scope" .properties.scope }} + {{ $data.Set "policyScope" .properties.policyScope }} + {{ $data.Set "alertName" .properties.alertName }} + {{ end }} + {{ end }} + {{ end }} + + + + + + + + + + + + {{ end }} + {{ end }} + {{ end }} + {{ end }} +{{ end }} + +
Alert Policy NameAlert NametargetScopeCategoryProperties.causeProperties.currentHealthStatusScopeVerifiedReferences
+ {{ $data.Get "name" }} + + {{ $data.Get "alertName" }} + + {{ $data.Get "policyScope" }} + + {{ .properties.category }} + + {{ transform.Highlight (encoding.Jsonify (dict "indent" " ") .properties.causes) "json" }} + + {{ transform.Highlight (encoding.Jsonify (dict "indent" " ") .properties.currentHealthStatus) "json" }} + + {{ $data.Get "scope" }} + + {{ if .verified }} + Y + {{ else }} + N + {{ end }} + + {{ range .references }} + {{ .name }} + {{ end }} +
diff --git a/docs/layouts/shortcodes/alzActivityLogServiceHealthAlerts.html b/docs/layouts/shortcodes/alzActivityLogServiceHealthAlerts.html new file mode 100644 index 000000000..966807435 --- /dev/null +++ b/docs/layouts/shortcodes/alzActivityLogServiceHealthAlerts.html @@ -0,0 +1,69 @@ +
+ + + + + + + + + + + +{{ range $category, $types := $.Site.Data }} + {{ range $type, $rules := $types }} + {{ range $rules.alerts }} + {{ if or (eq .visible true) (eq $.Site.Params.ambaDevMode true) }} + {{ if and (eq .type "ActivityLog") (in .tags "alz") (eq .properties.category "ServiceHealth") }} + {{ $data := newScratch }} + {{ if isset . "deployments" }} + {{ range where .deployments "type" "Policy" }} + {{ if and (in .tags "alz") }} + {{ $data.Set "name" .name }} + {{ $data.Set "url" (relURL (path.Join "services" $category $type .template)) }} + {{ $data.Set "scope" .properties.scope }} + {{ $data.Set "policyScope" .properties.policyScope }} + {{ $data.Set "documented" .properties.documented }} + {{ $data.Set "alertName" .properties.alertName }} + {{ end }} + {{ end }} + {{ end }} + + + + + + + + + + + {{ end }} + {{ end }} + {{ end }} + {{ end }} +{{ end }} + +
Alert Policy NameAlert NamePolicyScopeCategoryProperties.incidentTypeScopeDocumentedReferences
+ {{ $data.Get "name" }} + + {{ $data.Get "alertName" }} + + {{ $data.Get "policyScope" }} + + {{ .properties.category }} + + {{ .properties.incidentType }} + + {{ $data.Get "scope" }} + + {{ if ($data.Get "documented") }} + Yes + {{ else }} + No + {{ end }} + + {{ range .references }} + {{ .name }} + {{ end }} +
diff --git a/docs/layouts/shortcodes/alzMetricAlerts.html b/docs/layouts/shortcodes/alzMetricAlerts.html new file mode 100644 index 000000000..b5f96e170 --- /dev/null +++ b/docs/layouts/shortcodes/alzMetricAlerts.html @@ -0,0 +1,95 @@ +
+ + + + + + + + + + + + + + + + +{{ range $category, $types := $.Site.Data }} + {{ range $type, $rules := $types }} + {{ range $rules.alerts }} + {{ if or (eq .visible true) (eq $.Site.Params.ambaDevMode true) }} + {{ if and (eq .type "Metric") (in .tags "alz") }} + {{ $data := newScratch }} + {{ if isset . "deployments" }} + {{ range where .deployments "type" "Policy" }} + {{ if and (in .tags "alz") }} + {{ $data.Set "name" .name }} + {{ $data.Set "url" (relURL (path.Join "services" $category $type .template)) }} + {{ $data.Set "scope" .properties.scope }} + {{ $data.Set "multiResource" .properties.multiResource }} + {{ end }} + {{ end }} + {{ end }} + + + + + + + + + + + + + + + + {{ end }} + {{ end }} + {{ end }} + {{ end }} +{{ end }} + +
Alert NameComponentMetricAggregationOperatorThresholdWindowFrequencySeverityScopeSupport for Multiple ResorucesVerifiedReferences
+ {{ $data.Get "name" }} + + {{ .properties.metricNamespace }} + + {{ .properties.metricName }} + + {{ .properties.timeAggregation }} + + {{ .properties.operator }} + + {{ if eq .properties.criterionType "DynamicThresholdCriterion" }} + dynamic + {{ else }} + {{ .properties.threshold }} + {{ end }} + + {{ .properties.windowSize }} + + {{ .properties.evaluationFrequency }} + + {{ .properties.severity }} + + {{ $data.Get "scope" }} + + {{ if ($data.Get "multiResource") }} + Yes + {{ else }} + No + {{ end }} + + {{ if .verified }} + Y + {{ else }} + N + {{ end }} + + {{ range .references }} + {{ .name }} + {{ end }} +
diff --git a/docs/layouts/shortcodes/alzVMInsightsLogAlerts.html b/docs/layouts/shortcodes/alzVMInsightsLogAlerts.html new file mode 100644 index 000000000..f423a93f8 --- /dev/null +++ b/docs/layouts/shortcodes/alzVMInsightsLogAlerts.html @@ -0,0 +1,95 @@ +
+ + + + + + + + + + + + + + + + + +{{ range $category, $types := $.Site.Data }} + {{ if ne $category "Compute" }} + {{ continue }} + {{ end }} + {{ range $type, $rules := $types }} + {{ if ne $type "virtualMachines" }} + {{ continue }} + {{ end }} + {{ range $rules.alerts }} + {{ if or (eq .visible true) (eq $.Site.Params.ambaDevMode true) }} + {{ if and (eq .type "Log") (in .tags "alz") }} + {{ $data := newScratch }} + {{ if isset . "deployments" }} + {{ range where .deployments "type" "Policy" }} + {{ if and (in .tags "alz") }} + {{ $data.Set "name" .name }} + {{ $data.Set "url" (relURL (path.Join "services" $category $type .template)) }} + {{ end }} + {{ end }} + {{ end }} + + + + + + + + + + + + + + + + + {{ end }} + {{ end }} + {{ end }} + {{ end }} +{{ end }} + +
Alert NameComponentAggregationOperatorThresholdWindowSizeFrequencyResolveTimeFailingPeriodsDimensionsSeverityQueryVerifiedReferences
+ {{ $data.Get "name" }} + + {{$category}}/{{$type}} + + {{ .properties.timeAggregation }} + + {{ .properties.operator }} + + {{ .properties.threshold }} + + {{ .properties.windowSize }} + + {{ .properties.evaluationFrequency }} + + {{ .properties.autoResolveTime }} + + {{ transform.Highlight (encoding.Jsonify (dict "indent" " ") .properties.failingPeriods) "json" }} + + {{ transform.Highlight (encoding.Jsonify (dict "indent" " ") .properties.dimensions) "json" }} + + {{ .properties.severity }} + + {{ transform.Highlight .properties.query "plaintext" }} + + {{ if .verified }} + Y + {{ else }} + N + {{ end }} + + {{ range .references }} + {{ .name }} + {{ end }} +
diff --git a/services/Automation/automationAccounts/alerts.yaml b/services/Automation/automationAccounts/alerts.yaml index 52e70b779..d6cd9cb0e 100644 --- a/services/Automation/automationAccounts/alerts.yaml +++ b/services/Automation/automationAccounts/alerts.yaml @@ -20,12 +20,19 @@ operator: Exclude values: - Completed + autoMitigate: false references: - name: Azure Automation Azure Monitor Metrics url: https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftautomationautomationaccounts deployments: - - description: Policy to audit/deploy Automation Account TotalJob Alert + - name: Deploy Automation Account TotalJob Alert template: Deploy-AA-TotalJob-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: TotalJob description: The total number of jobs type: Metric @@ -49,6 +56,7 @@ values: - failed threshold: 0.0 + autoMitigate: false - name: TotalUpdateDeploymentMachineRuns description: Total software update deployment machine runs in a software update deployment run @@ -73,6 +81,7 @@ values: - failed threshold: 0.0 + autoMitigate: false - name: TotalUpdateDeploymentRuns description: Total software update deployment runs type: Metric @@ -96,3 +105,4 @@ values: - failed threshold: 0.0 + autoMitigate: false diff --git a/services/Compute/virtualMachines/alerts.yaml b/services/Compute/virtualMachines/alerts.yaml index 0c37bf8d8..615a81e12 100644 --- a/services/Compute/virtualMachines/alerts.yaml +++ b/services/Compute/virtualMachines/alerts.yaml @@ -16,12 +16,19 @@ operator: LessThan threshold: 1000 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Supported Metrics for Microsoft.Compute/virtualMachines url: https://learn.microsoft.com/azure/azure-monitor/reference/supported-metrics/microsoft-compute-virtualmachines-metrics deployments: - - description: Policy to audit/deploy VM Available Memory Bytes (MBytes) Alert + - name: Deploy VM Available Memory Bytes (MBytes) Alert template: Deploy-VM-AvailableMemory-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Data Disk Read Latency (ms) description: Log Alert for Virtual Machine Data Disk Read Latency (ms) type: Log @@ -64,9 +71,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Data Disk Read Latency Alert + - name: Deploy VM Data Disk Read Latency Alert template: Deploy-VM-DataDiskReadLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Data Disk Free Space Percentage description: Log Alert for Virtual Machine Data Disk Free Space Percentage type: Log @@ -109,9 +128,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Data Disk Free Space Percentage Alert + - name: Deploy VM Data Disk Free Space Percentage Alert template: Deploy-VM-DataDiskSpace-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Data Disk Write Latency (ms) description: Log Alert for Virtual Machine Data Disk Write Latency (ms) type: Log @@ -154,9 +185,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Data Disk Write Latency Alert + - name: Deploy VM Data Disk Write Latency Alert template: Deploy-VM-DataDiskWriteLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Heartbeat description: Log Alert for Virtual Machine Heartbeat type: Log @@ -191,12 +234,29 @@ _ResourceId ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM HeartBeat Alert for all VMs in the subscription + - name: Deploy VM HeartBeat Alert for all VMs in the subscription template: Deploy-VM-HeartBeat-Alert.json - - description: Policy to audit/deploy VM HeartBeat Alert for VMs in the resource - group + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false + - name: Deploy VM HeartBeat Alert for VMs in the resource group template: Deploy-VM-HeartBeatAlertRG.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Network Read (bytes/sec) description: Log Alert for Virtual Machine Network Read (bytes/sec) type: Log @@ -237,9 +297,21 @@ _ResourceId, NetworkInterface ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Nework Read (bytes/sec) Alert + - name: Deploy VM Nework Read (bytes/sec) Alert template: Deploy-VM-NetworkIn-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Network Write (bytes/sec) description: Log Alert for Virtual Machine Network Write (bytes/sec) type: Log @@ -280,9 +352,21 @@ _ResourceId, NetworkInterface ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Nework Write (bytes/sec) Alert + - name: Deploy VM Nework Write (bytes/sec) Alert template: Deploy-VM-NetworkOut-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: OS Disk Read Latency (ms) description: Log Alert for Virtual Machine Data OS Read Latency (ms) type: Log @@ -323,9 +407,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM OS Disk Read Latency Alert + - name: Deploy VM OS Disk Read Latency Alert template: Deploy-VM-OSDiskReadLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: OS Disk Free Space Percentage description: Log Alert for Virtual Machine OS Disk Free Space Percentage type: Log @@ -366,9 +462,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM OS Disk Free Space Percentage Alert + - name: Deploy VM OS Disk Free Space Percentage Alert template: Deploy-VM-OSDiskSpace-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: OS Disk Write Latency (ms) description: Log Alert for Virtual Machine OS Disk Write Latency (ms) type: Log @@ -407,9 +515,21 @@ _ResourceId, Disk ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM OS Disk Write Latency Alert + - name: Deploy VM OS Disk Write Latency Alert template: Deploy-VM-OSDiskWriteLatency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Processor Utilization Percentage description: Log Alert for Virtual Machine Processor Utilization Percentage type: Log @@ -444,9 +564,21 @@ _ResourceId ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Processor Utilization Percentage Alert + - name: Deploy VM Processor Utilization Percentage Alert template: Deploy-VM-PercentCPU-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Available Memory Percentage description: Log Alert for Virtual Machine Available Memory Percentage type: Log @@ -485,9 +617,21 @@ 15m), Computer, _ResourceId ' + autoMitigate: true + autoResolve: true + autoResolveTime: '0:10:00' + references: + - name: "Monitor virtual machines with Azure Monitor: Alerts" + url: "https://learn.microsoft.com/en-us/azure/azure-monitor/vm/monitor-virtual-machine-alerts#common-alert-rules" deployments: - - description: Policy to audit/deploy VM Available Memory Percentage Alert + - name: Deploy VM Available Memory Percentage Alert template: Deploy-VM-PercentMemory-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + multiResource: false - name: Percentage CPU description: The percentage of allocated compute units that are currently in use by the Virtual Machine(s) @@ -507,6 +651,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: Data Disk IOPS Consumed Percentage description: Percentage of data disk I/Os consumed per minute. Only available on VM series that support premium storage. @@ -526,6 +671,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 95.0 + autoMitigate: false - name: OS Disk IOPS Consumed Percentage description: Percentage of operating system disk I/Os consumed per minute. Only available on VM series that support premium storage. @@ -545,6 +691,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 95.0 + autoMitigate: false - name: Available Memory Bytes description: Amount of physical memory, in bytes, immediately available for allocation to a process or for system use in the Virtual Machine @@ -564,6 +711,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1000000000.0 + autoMitigate: false - name: Network In Total description: The number of bytes received on all network interfaces by the Virtual Machine(s) (Incoming Traffic) @@ -583,6 +731,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 500000000000.0 + autoMitigate: false - name: Network Out Total description: The number of bytes out on all network interfaces by the Virtual Machine(s) (Outgoing Traffic) @@ -602,6 +751,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 200000000000.0 + autoMitigate: false - name: VmAvailabilityMetric description: Measure of Availability of Virtual machines over time. type: Metric @@ -620,6 +770,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: OS Disk Bandwidth Consumed Percentage description: Percentage of operating system disk bandwidth consumed per minute. Only available on VM series that support premium storage. @@ -639,6 +790,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: Inbound Flows description: Inbound Flows are number of current flows in the inbound direction (traffic going into the VM) @@ -658,6 +810,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100000.0 + autoMitigate: false - name: Outbound Flows description: Outbound Flows are number of current flows in the outbound direction (traffic going out of the VM) @@ -677,6 +830,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100000.0 + autoMitigate: false - name: Data Disk Bandwidth Consumed Percentage description: Percentage of data disk bandwidth consumed per minute. Only available on VM series that support premium storage. @@ -696,6 +850,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: CPU Credits Remaining description: Total number of credits available to burst. Only available on B-series burstable VMs @@ -715,6 +870,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 5.0 + autoMitigate: false - name: Data Disk Queue Depth description: Data Disk Queue Depth(or Queue Length) type: Metric @@ -733,6 +889,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: VM Cached IOPS Consumed Percentage description: Percentage of cached disk IOPS consumed by the VM. Only available on VM series that support premium storage. @@ -752,6 +909,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: OS Disk Queue Depth description: OS Disk Queue Depth(or Queue Length) type: Metric @@ -770,6 +928,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + autoMitigate: false - name: VM Cached Bandwidth Consumed Percentage description: Percentage of cached disk bandwidth consumed by the VM. Only available on VM series that support premium storage. @@ -789,6 +948,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: VM Uncached IOPS Consumed Percentage description: Percentage of uncached disk IOPS consumed by the VM. Only available on VM series that support premium storage. @@ -808,6 +968,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: VM Uncached Bandwidth Consumed Percentage description: Percentage of uncached disk bandwidth consumed by the VM. Only available on VM series that support premium storage. @@ -827,6 +988,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: Disk Write Operations/Sec description: Disk Write IOPS type: Metric @@ -845,6 +1007,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + autoMitigate: false - name: CPU Credits Consumed description: Total number of credits consumed by the Virtual Machine. Only available on B-series burstable VMs @@ -864,6 +1027,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: Data Disk Write Bytes/sec description: Bytes/Sec written to a single disk during monitoring period type: Metric @@ -882,6 +1046,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10000000.0 + autoMitigate: false - name: OS Disk Write Bytes/sec description: Bytes/Sec written to a single disk during monitoring period for OS disk @@ -901,6 +1066,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 48000000.0 + autoMitigate: false - name: Disk Read Operations/Sec description: Disk Read IOPS type: Metric @@ -919,6 +1085,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 400.0 + autoMitigate: false - name: OS Disk Max Burst IOPS description: Maximum IOPS OS Disk can achieve with bursting type: Metric @@ -937,6 +1104,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3500.0 + autoMitigate: false - name: Data Disk Read Operations/Sec description: Read IOPS from a single disk during monitoring period type: Metric @@ -955,6 +1123,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: Data Disk Max Burst IOPS description: Maximum IOPS Data Disk can achieve with bursting type: Metric @@ -973,6 +1142,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3750.0 + autoMitigate: false - name: Disk Write Bytes description: Bytes written to disk during monitoring period type: Metric @@ -991,6 +1161,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 107374182400.0 + autoMitigate: false - name: OS Disk Write Operations/Sec description: Write IOPS from a single disk during monitoring period for OS disk type: Metric @@ -1009,6 +1180,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 200.0 + autoMitigate: false - name: Disk Read Bytes description: Bytes read from disk during monitoring period type: Metric @@ -1027,6 +1199,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: Data Disk Write Operations/Sec description: Write IOPS from a single disk during monitoring period type: Metric @@ -1045,3 +1218,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false diff --git a/services/KeyVault/vaults/alerts.yaml b/services/KeyVault/vaults/alerts.yaml index 12d7691a1..d86acefda 100644 --- a/services/KeyVault/vaults/alerts.yaml +++ b/services/KeyVault/vaults/alerts.yaml @@ -16,8 +16,16 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log Key Vault Delete Alert + - name: Deploy Activity Log Key Vault Delete Alert template: Deploy-ActivityLog-KeyVault-Del.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: false + alertName: ActivityKeyVaultDelete - name: Availability description: Vault requests availability type: Metric @@ -35,6 +43,7 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitoring KeyVault Reference url: https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference @@ -43,8 +52,14 @@ - name: KeyVault Insights Overview url: https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview deployments: - - description: Policy to audit/deploy KeyVault Availability Alert + - name: Deploy KeyVault Availability Alert template: Deploy-KV-Availability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: true - name: Saturation Shoebox description: Vault capacity used type: Metric @@ -62,6 +77,7 @@ operator: GreaterThan threshold: 75 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitoring KeyVault Reference url: https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference @@ -70,8 +86,14 @@ - name: KeyVault Insights Overview url: https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview deployments: - - description: Policy to audit/deploy KeyVault Capacity Alert + - name: Deploy KeyVault Capacity Alert template: Deploy-KV-Capacity-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: true - name: Service API Latency description: Overall latency of service api requests type: Metric @@ -89,6 +111,7 @@ operator: GreaterThan threshold: 1000 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitoring KeyVault Reference url: https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference @@ -97,8 +120,14 @@ - name: KeyVault Insights Overview url: https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview deployments: - - description: Policy to audit/deploy KeyVault Latency Alert + - name: Deploy KeyVault Latency Alert template: Deploy-KV-Latency-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: true - name: Service API Result description: Number of total service api results type: Metric @@ -119,6 +148,7 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - name: Monitoring KeyVault Reference url: https://docs.microsoft.com/en-us/azure/key-vault/general/monitor-key-vault-reference @@ -127,8 +157,14 @@ - name: KeyVault Insights Overview url: https://docs.microsoft.com/en-us/azure/azure-monitor/insights/key-vault-insights-overview deployments: - - description: Policy to audit/deploy KeyVault Requests Alert + - name: Deploy KeyVault Requests Alert template: Deploy-KV-Requests-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: true - name: Availability description: Vault requests availability type: Metric @@ -147,6 +183,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: ServiceApiLatency description: Overall latency of service api requests type: Metric @@ -165,6 +202,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false - name: SaturationShoebox description: Vault capacity used type: Metric @@ -183,6 +221,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 75.0 + autoMitigate: false - name: ServiceApiHit description: Number of total service api hits type: Metric @@ -201,6 +240,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: ServiceApiResult description: Number of total service api results type: Metric @@ -224,3 +264,4 @@ values: - '429' threshold: 0.0 + autoMitigate: false diff --git a/services/Network/applicationGateways/alerts.yaml b/services/Network/applicationGateways/alerts.yaml index 8fedce365..8ec0aed8e 100644 --- a/services/Network/applicationGateways/alerts.yaml +++ b/services/Network/applicationGateways/alerts.yaml @@ -23,10 +23,16 @@ failingPeriods: numberOfEvaluationPeriods: 2 minFailingPeriodsToAlert: 2 + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway ApplicationGatewayTotalTime - Alert + - name: Deploy Azure Application Gateway ApplicationGatewayTotalTime Alert template: Deploy-AGW-ApplicationGatewayTotalTime-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Backend Last Byte Response Time description: Time interval between start of establishing a connection to backend server and receiving the last byte of the response body @@ -48,10 +54,16 @@ failingPeriods: numberOfEvaluationPeriods: 2 minFailingPeriodsToAlert: 2 + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway BackendLastByteResponseTime - Alert + - name: Deploy Azure Application Gateway BackendLastByteResponseTime Alert template: Deploy-AGW-BackendLastByteResponseTime-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Capacity Units description: Capacity Units consumed type: Metric @@ -69,9 +81,16 @@ operator: GreaterThan threshold: 75 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway CapacityUnits Alert + - name: Deploy Azure Application Gateway CapacityUnits Alert template: Deploy-AGW-CapacityUnits-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Compute Units description: Compute Units consumed type: Metric @@ -89,9 +108,16 @@ operator: GreaterThan threshold: 75 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway CapacityUnits Alert + - name: Deploy Azure Application Gateway CapacityUnits Alert template: Deploy-AGW-CapacityUnits-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Cpu Utilization description: Current CPU utilization of the Application Gateway type: Metric @@ -109,10 +135,16 @@ operator: GreaterThan threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway CPU Utilization - Alert + - name: Deploy Azure Application Gateway CPU Utilization Alert template: Deploy-AGW-CPUUtil-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Failed Requests description: Count of failed requests that Application Gateway has served type: Metric @@ -133,9 +165,16 @@ failingPeriods: numberOfEvaluationPeriods: 2 minFailingPeriodsToAlert: 2 + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway FailedRequests Alert + - name: Deploy Azure Application Gateway FailedRequests Alert template: Deploy-AGW-FailedRequests-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Response Status description: Http response status returned by Application Gateway type: Metric @@ -162,9 +201,16 @@ failingPeriods: numberOfEvaluationPeriods: 2 minFailingPeriodsToAlert: 2 + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway ResponseStatus Alert + - name: Deploy Azure Application Gateway ResponseStatus Alert template: Deploy-AGW-ResponseStatus-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Unhealthy Host Count description: Number of unhealthy backend hosts type: Metric @@ -182,10 +228,16 @@ operator: GreaterThan threshold: 20 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy Azure Application Gateway Unhealthy Host Count - Alert + - name: Deploy Azure Application Gateway Unhealthy Host Count Alert template: Deploy-AGW-UnhealthyHostCount-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: UnhealthyHostCount description: Number of unhealthy backend hosts type: Metric @@ -204,6 +256,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: FailedRequests description: Count of failed requests that Application Gateway has served type: Metric @@ -222,6 +275,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + autoMitigate: false - name: HealthyHostCount description: Number of healthy backend hosts type: Metric @@ -240,6 +294,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: ResponseStatus description: Http response status returned by Application Gateway type: Metric @@ -263,6 +318,7 @@ values: - 5xx threshold: 10.0 + autoMitigate: false - name: ApplicationGatewayTotalTime description: Time that it takes for a request to be processed and its response to be sent. This is the interval from the time when Application Gateway receives @@ -286,6 +342,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false - name: CapacityUnits description: Capacity Units consumed type: Metric @@ -304,6 +361,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 15.0 + autoMitigate: false - name: BackendResponseStatus description: The number of HTTP response codes generated by the backend members. This does not include any response codes generated by the Application Gateway. @@ -328,6 +386,7 @@ values: - 5xx threshold: 15000.0 + autoMitigate: false - name: CpuUtilization description: Current CPU utilization of the Application Gateway type: Metric @@ -346,6 +405,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: BackendConnectTime description: Time spent establishing a connection with a backend server type: Metric @@ -364,6 +424,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10000.0 + autoMitigate: false - name: ComputeUnits description: Compute Units consumed type: Metric @@ -382,6 +443,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 7.5 + autoMitigate: false - name: BackendLastByteResponseTime description: Time interval between start of establishing a connection to backend server and receiving the last byte of the response body @@ -401,6 +463,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 8000.0 + autoMitigate: false - name: Throughput description: Number of bytes per second the Application Gateway has served type: Metric @@ -419,6 +482,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 167772160.0 + autoMitigate: false - name: CurrentConnections description: Count of current connections established with Application Gateway type: Metric @@ -437,6 +501,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10000.0 + autoMitigate: false - name: TotalRequests description: Count of successful requests that Application Gateway has served type: Metric @@ -455,6 +520,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 20000.0 + autoMitigate: false - name: NewConnectionsPerSecond description: New connections per second established with Application Gateway type: Metric @@ -473,6 +539,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10000.0 + autoMitigate: false - name: BackendFirstByteResponseTime description: Time interval between start of establishing a connection to backend server and receiving the first byte of the response header, approximating processing @@ -493,6 +560,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 5000.0 + autoMitigate: false - name: EstimatedBilledCapacityUnits description: Estimated capacity units that will be charged type: Metric @@ -511,6 +579,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 20.0 + autoMitigate: false - name: AvgRequestCountPerHealthyHost description: Average request count per minute per healthy backend host in a pool type: Metric @@ -529,3 +598,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 20000.0 + autoMitigate: false diff --git a/services/Network/azureFirewalls/alerts.yaml b/services/Network/azureFirewalls/alerts.yaml index e1c88848f..ee26291a0 100644 --- a/services/Network/azureFirewalls/alerts.yaml +++ b/services/Network/azureFirewalls/alerts.yaml @@ -16,8 +16,16 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log Azure Firewall Delete Alert + - name: Deploy Activity Log Azure Firewall Delete Alert template: Deploy-ActivityLog-AzureFirewall-Del.json + type: Policy + tags: + - alz + properties: + scope: Resource + policyScope: managementGroup + documented: false + alertName: ActivityAzureFirewallDelete - name: Firewall Health description: Indicates the overall health of this firewall type: Metric @@ -35,12 +43,19 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Overview of Azure Firewall logs and metrics - url: https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics + - name: Overview of Azure Firewall logs and metrics + url: https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics deployments: - - description: Policy to audit/deploy Azure Firewall FirewallHealth Alert + - name: Deploy AFW FirewallHealth Alert template: Deploy-AFW-FirewallHealth-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: SNAT Port Utilization description: Percentage of outbound SNAT ports currently in use type: Metric @@ -58,12 +73,19 @@ operator: LessThan threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Overview of Azure Firewall logs and metrics - url: https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics + - name: Overview of Azure Firewall logs and metrics + url: https://docs.microsoft.com/en-us/azure/firewall/logs-and-metrics#metrics deployments: - - description: Policy to audit/deploy Azure Firewall SNATPortUtilization Alert + - name: Deploy AFW SNATPortUtilization Alert template: Deploy-AFW-SNATPortUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: FirewallHealth description: Indicates the overall health of this firewall type: Metric @@ -82,6 +104,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: SNATPortUtilization description: Percentage of outbound SNAT ports currently in use type: Metric @@ -100,6 +123,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 99.0 + autoMitigate: false - name: Throughput description: Throughput processed by this firewall type: Metric @@ -118,3 +142,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 27000000000.0 + autoMitigate: false diff --git a/services/Network/expressRouteCircuits/alerts.yaml b/services/Network/expressRouteCircuits/alerts.yaml index f9ccabe2e..3812ea1bc 100644 --- a/services/Network/expressRouteCircuits/alerts.yaml +++ b/services/Network/expressRouteCircuits/alerts.yaml @@ -15,14 +15,21 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Monitor ExpressRoute Alerts - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts - - name: ExpressRoute KQL Queries - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries + - name: Monitor ExpressRoute Alerts + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts + - name: ExpressRoute KQL Queries + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries deployments: - - description: Policy to audit/deploy ExpressRoute Circuits ARP Availability Alert + - name: Deploy ExpressRoute Circuits ARP Availability Alert template: Deploy-ERCIR-ARPAvailability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: BGP Availability description: BGP Availability from MSEE towards all peers. type: Metric @@ -40,14 +47,21 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Monitor ExpressRoute Alerts - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts - - name: ExpressRoute KQL Queries - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries + - name: Monitor ExpressRoute Alerts + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts + - name: ExpressRoute KQL Queries + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries deployments: - - description: Policy to audit/deploy ExpressRoute Circuits BGP Availability Alert + - name: Deploy ExpressRoute Circuits BGP Availability Alert template: Deploy-ERCIR-BGPAvailability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: QOS Drop Bits In Per Second description: Ingress bits of data dropped per second type: Metric @@ -68,15 +82,21 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Monitor ExpressRoute Alerts - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts - - name: ExpressRoute KQL Queries - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries + - name: Monitor ExpressRoute Alerts + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts + - name: ExpressRoute KQL Queries + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries deployments: - - description: Policy to audit/deploy ExpressRoute Circuits QosDropBitsInPerSecond - Alert + - name: Deploy ExpressRoute Circuits QosDropBitsInPerSecond Alert template: Deploy-ERCIR-QOSDropsBitsIn-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: QOS Drop Bits Out Per Second description: Egress bits of data dropped per second type: Metric @@ -97,15 +117,21 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Monitor ExpressRoute Alerts - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts - - name: ExpressRoute KQL Queries - url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries + - name: Monitor ExpressRoute Alerts + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#alerts + - name: ExpressRoute KQL Queries + url: https://docs.microsoft.com/en-us/azure/expressroute/monitor-expressroute#sample-kusto-queries deployments: - - description: Policy to audit/deploy ExpressRoute Circuits QosDropBitsOutPerSecond - Alert + - name: Deploy ExpressRoute Circuits QosDropBitsOutPerSecond Alert template: Deploy-ERCIR-QOSDropsBitsOut-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: BgpAvailability description: BGP Availability from MSEE towards all peers. type: Metric @@ -124,6 +150,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: BitsInPerSecond description: Bits ingressing Azure per second type: Metric @@ -142,6 +169,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 800000000.0 + autoMitigate: false - name: BitsOutPerSecond description: Bits egressing Azure per second type: Metric @@ -160,6 +188,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 800000000.0 + autoMitigate: false - name: ArpAvailability description: ARP Availability from MSEE towards all peers. type: Metric @@ -178,6 +207,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: QosDropBitsInPerSecond description: Ingress bits of data dropped per second type: Metric @@ -196,6 +226,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: QosDropBitsOutPerSecond description: Egress bits of data dropped per second type: Metric @@ -214,3 +245,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false diff --git a/services/Network/expressRouteGateways/alerts.yaml b/services/Network/expressRouteGateways/alerts.yaml index 7ac927bed..1d8967066 100644 --- a/services/Network/expressRouteGateways/alerts.yaml +++ b/services/Network/expressRouteGateways/alerts.yaml @@ -1,8 +1,8 @@ - name: ER Gateway Connection Bits In Per Second description: Metric Alert for ER Gateway Connection BitsInPerSecond type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -15,17 +15,24 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways - url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways + - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways + url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways deployments: - - description: Policy to audit/deploy ER Gateway Connection BitsInPerSecond Alert + - name: Deploy ERG ExpressRoute Bits In Alert template: Deploy-ERG-BitsInPerSecond-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: ER Gateway Connection Bits Out Per Second description: Metric Alert for ER Gateway Connection BitsOutPerSecond type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -38,17 +45,24 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways - url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways + - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways + url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways deployments: - - description: Policy to audit/deploy ER Gateway Connection BitsOutPerSecond Alert + - name: Deploy ERG ExpressRoute Bits Out Alert template: Deploy-ERG-BitsOutPerSecond-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: ExpressRoute Gateway CPU Utilization description: Metric Alert for ER Gateway Express Route CPU Utilization type: Metric - verified: True - visible: True + verified: true + visible: true tags: - alz properties: @@ -61,9 +75,16 @@ operator: GreaterThan threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways - url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways + - name: ExpressRoute Monitoring Metrics Alerts for ExpressRoute Gateways + url: https://learn.microsoft.com/en-us/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways deployments: - - description: Policy to audit/deploy ER Gateway Express Route CPU Utilization Alert + - name: Deploy ERG ExpressRoute CPU Utilization Alert template: Deploy-ERG-CPUUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false diff --git a/services/Network/expressRoutePorts/alerts.yaml b/services/Network/expressRoutePorts/alerts.yaml index 29a88ba0d..4030218fb 100644 --- a/services/Network/expressRoutePorts/alerts.yaml +++ b/services/Network/expressRoutePorts/alerts.yaml @@ -1,8 +1,8 @@ - name: Port Bits In Per Second description: Metric Alert for ER Direct Connection BitsInPerSecond type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -15,14 +15,21 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct Connection BitsInPerSecond Alert + - name: Deploy ER Direct Connection BitsInPerSecond Alert template: Deploy-ERP-BitsInPerSecond-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Port Bits Out Per Second description: Metric Alert for ER Direct Connection BitsOutPerSecond type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -35,14 +42,21 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct Connection BitsOutPerSecond Alert + - name: Deploy ER Direct Connection BitsOutPerSecond Alert template: Deploy-ERP-BitsOutPerSecond-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Line Protocol description: Metric Alert for ER Direct Connection LineProtocol type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -55,14 +69,21 @@ operator: LessThan threshold: 0.9 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct LineProtocol Alert + - name: Deploy ER Direct LineProtocol Alert template: Deploy-ERP-LineProtocol-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Rx Light Level High description: Metric Alert for ER Direct Connection RxLightLevel High type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -75,14 +96,21 @@ operator: GreaterThan threshold: 0 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct RxLightLevel High Alert + - name: Deploy ER Direct RxLightLevel High Alert template: Deploy-ERP-RxLightLevelHigh-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Rx Light Level Low description: Metric Alert for ER Direct Connection RxLightLevel Low type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -95,14 +123,21 @@ operator: LessThan threshold: -10 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct RxLightLevel Low Alert + - name: Deploy ER Direct RxLightLevel Low Alert template: Deploy-ERP-RxLightLevelLow-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tx Light Level High description: Metric Alert for ER Direct Connection TxLightLevel High type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -115,14 +150,21 @@ operator: GreaterThan threshold: 0 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct TxLightLevel High Alert + - name: Deploy ER Direct TxLightLevel High Alert template: Deploy-ERP-TxLightLevelHigh-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tx Light Level Low description: Metric Alert for ER Direct Connection TxLightLevel Low type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -135,6 +177,13 @@ operator: LessThan threshold: -10 criterionType: StaticThresholdCriterion + autoMitigate: false deployments: - - description: Policy to audit/deploy ER Direct TxLightLevel Low Alert + - name: Deploy ER Direct TxLightLevel Low Alert template: Deploy-ERP-TxLightLevelLow-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false diff --git a/services/Network/loadBalancers/alerts.yaml b/services/Network/loadBalancers/alerts.yaml index d0cbdae8b..035053fe3 100644 --- a/services/Network/loadBalancers/alerts.yaml +++ b/services/Network/loadBalancers/alerts.yaml @@ -15,17 +15,23 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Azure Monitor supported metrics by resource type - Azure Load Balancer - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics - - name: Azure Load Balancer Multi-Demensional-Metrics - url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#multi-dimensional-metrics - - name: Is The Data Path Up and Available for My Load-Balancer - url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#is-the-data-path-up-and-available-for-my-load-balancer-frontend + - name: Azure Monitor supported metrics by resource type - Azure Load Balancer + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics + - name: Azure Load Balancer Multi-Demensional-Metrics + url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#multi-dimensional-metrics + - name: Is The Data Path Up and Available for My Load-Balancer + url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#is-the-data-path-up-and-available-for-my-load-balancer-frontend deployments: - - description: Policy to audit/deploy Azure Load Balancer Data Path Availability - Alert + - name: Deploy ALB Data Path Availability Alert template: Deploy-LB-DatapathAvailability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Global Backend Availability description: Metric Alert for Global Backend Availability type: Metric @@ -43,13 +49,19 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Azure Monitor supported metrics by resource type - Azure Load Balancer - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics + - name: Azure Monitor supported metrics by resource type - Azure Load Balancer + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics deployments: - - description: Policy to audit/deploy Azure Load Balancer Global Backend Availability - Alert + - name: Deploy ALB Global Backend Availability Alert template: Deploy-LB-GlobalBackendAvailability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Dip Availability description: Average Load Balancer health probe status per time duration type: Metric @@ -67,14 +79,21 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Azure Monitor supported metrics by resource type - Azure Load Balancer - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics - - name: Are Backend Instances for my Load-Balancer Responding to Probes - url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#are-the-backend-instances-for-my-load-balancer-responding-to-probes + - name: Azure Monitor supported metrics by resource type - Azure Load Balancer + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics + - name: Are Backend Instances for my Load-Balancer Responding to Probes + url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#are-the-backend-instances-for-my-load-balancer-responding-to-probes deployments: - - description: Policy to audit/deploy Azure Load Balancer Health Probe Status Alert + - name: Deploy ALB Health Probe Status Alert template: Deploy-LB-HealthProbeStatus-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Used SNAT Ports description: Metric Alert for ALB Used SNAT Ports type: Metric @@ -92,16 +111,23 @@ operator: GreaterThan threshold: 900 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Azure Monitor supported metrics by resource type - Azure Load Balancer - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics - - name: Load-Balancer Alerts - url: https://learn.microsoft.com/en-us/azure/load-balancer/monitor-load-balancer#alerts - - name: Check My SNAT Port Usage and Allocation - url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#how-do-i-check-my-snat-port-usage-and-allocation + - name: Azure Monitor supported metrics by resource type - Azure Load Balancer + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-loadbalancers-metrics + - name: Load-Balancer Alerts + url: https://learn.microsoft.com/en-us/azure/load-balancer/monitor-load-balancer#alerts + - name: Check My SNAT Port Usage and Allocation + url: https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-diagnostics#how-do-i-check-my-snat-port-usage-and-allocation deployments: - - description: Policy to audit/deploy Azure Load Balancer Used SNAT Ports Alert + - name: Deploy ALB Used SNAT Ports Alert template: Deploy-LB-UsedSNATPorts-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: DipAvailability description: Average Load Balancer health probe status per time duration type: Metric @@ -120,6 +146,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 51.0 + autoMitigate: false - name: UsedSnatPorts description: Total number of SNAT ports used within time period type: Metric @@ -143,6 +170,7 @@ values: - '*' threshold: 900.0 + autoMitigate: false - name: VipAvailability description: Average Load Balancer data path availability per time duration type: Metric @@ -161,6 +189,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 50.0 + autoMitigate: false - name: SnatConnectionCount description: Total number of new SNAT connections created within time period type: Metric @@ -184,3 +213,4 @@ values: - failed threshold: 0.0 + autoMitigate: false diff --git a/services/Network/networkSecurityGroups/alerts.yaml b/services/Network/networkSecurityGroups/alerts.yaml index e11727c78..870dbb35d 100644 --- a/services/Network/networkSecurityGroups/alerts.yaml +++ b/services/Network/networkSecurityGroups/alerts.yaml @@ -1,19 +1,28 @@ - name: Activity Log NSG Delete description: Activity Log Alert for NSG Delete type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: category: Administrative operationName: Microsoft.Network/networkSecurityGroups/delete - status: [ succeeded ] + status: + - succeeded references: - name: Activity Log Service Notifications url: https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log NSG Delete Alert + - name: Policy to Deploy Activity Log NSG Delete Alert template: Deploy-ActivityLog-NSG-Del.json + type: Policy + tags: + - alz + properties: + scope: Resource + policyScope: managementGroup + documented: false + alertName: ActivityNSGDelete diff --git a/services/Network/privateDnsZones/alerts.yaml b/services/Network/privateDnsZones/alerts.yaml index 26df53eba..1dfdd3797 100644 --- a/services/Network/privateDnsZones/alerts.yaml +++ b/services/Network/privateDnsZones/alerts.yaml @@ -16,12 +16,19 @@ operator: GreaterThanOrEqual threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Private DNS Alert Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-privatednszones-metrics deployments: - - description: Policy to audit/deploy Private DNS Zone Capacity Utilization Alert + - name: Deploy PDNSZ Capacity Utilization Alert template: Deploy-PDNSZ-CapacityUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Query Volume description: Number of queries served for a Private DNS zone type: Metric @@ -39,12 +46,19 @@ operator: GreaterThanOrEqual threshold: 500 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Private DNS Alert Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-privatednszones-metrics deployments: - - description: Policy to audit/deploy Private DNS Zone Query Volume Alert + - name: Deploy PDNSZ Query Volume Alert template: Deploy-PDNSZ-QueryVolume-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Record Set Capacity Utilization description: Percent of Record Set capacity utilized by a Private DNS zone type: Metric @@ -62,12 +76,19 @@ operator: GreaterThanOrEqual threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Private DNS Alert Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-privatednszones-metrics deployments: - - description: Policy to audit/deploy Private DNS Zone Record Set Capacity Alert + - name: Deploy PDNSZ Record Set Capacity Alert template: Deploy-PDNSZ-RecordSetCapacity-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Virtual Network With Registration Capacity Utilization description: Percent of Virtual Network Link with auto-registration capacity utilized by a Private DNS zone @@ -86,13 +107,19 @@ operator: GreaterThanOrEqual threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Private DNS Alert Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-privatednszones-metrics deployments: - - description: Policy to audit/deploy Private DNS Zone Registration Capacity Utilization - Alert + - name: Deploy PDNSZ Registration Capacity Utilization Alert template: Deploy-PDNSZ-RegistrationCapacityUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: VirtualNetworkLinkCapacityUtilization description: Percent of Virtual Network Link capacity utilized by a Private DNS zone @@ -112,6 +139,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: RecordSetCapacityUtilization description: Percent of Record Set capacity utilized by a Private DNS zone type: Metric @@ -130,6 +158,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: VirtualNetworkWithRegistrationCapacityUtilization description: Percent of Virtual Network Link with auto-registration capacity utilized by a Private DNS zone @@ -149,6 +178,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: QueryVolume description: Number of queries served for a Private DNS zone type: Metric @@ -167,6 +197,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 500.0 + autoMitigate: false - name: RecordSetCount description: Number of Record Sets in a Private DNS zone type: Metric @@ -185,3 +216,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 18750.0 + autoMitigate: false diff --git a/services/Network/publicIPAddresses/alerts.yaml b/services/Network/publicIPAddresses/alerts.yaml index 8ee2527b3..840e67b18 100644 --- a/services/Network/publicIPAddresses/alerts.yaml +++ b/services/Network/publicIPAddresses/alerts.yaml @@ -15,14 +15,21 @@ operator: GreaterThan threshold: 8000000 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitor Public IP Addresses url: https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts - name: Public IP Addresses Supported Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses deployments: - - description: Policy to audit/deploy PIP Bytes in DDoS Attack Alert + - name: Deploy PIP Bytes in DDoS Attack Alert template: Deploy-PIP-BytesInDDOSAttack-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: If Under DDoS Attack description: Metric Alert for Public IP Address Under Attack type: Metric @@ -40,14 +47,21 @@ operator: GreaterThan threshold: 0 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitor Public IP Addresses url: https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts - name: Public IP Addresses Supported Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses deployments: - - description: Policy to audit/deploy PIP DDoS Attack Alert + - name: Deploy PIP DDoS Attack Alert template: Deploy-PIP-DDOSAttack-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Packets In DDoS description: Inbound packets DDoS type: Metric @@ -65,14 +79,21 @@ operator: GreaterThanOrEqual threshold: 40000 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitor Public IP Addresses url: https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts - name: Public IP Addresses Supported Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses deployments: - - description: Policy to audit/deploy PIP Packets in DDoS Attack Alert + - name: Deploy PIP Packets in DDoS Attack Alert template: Deploy-PIP-PacketsInDDOS-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: VIP Availability description: Average IP Address availability per time duration type: Metric @@ -90,14 +111,21 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Monitor Public IP Addresses url: https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/monitor-public-ip#alerts - name: Public IP Addresses Supported Metrics url: https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkpublicipaddresses deployments: - - description: Policy to audit/deploy PIP VIP Availability Alert + - name: Deploy PIP VIP Availability Alert template: Deploy-PIP-VIPAvailability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: IfUnderDDoSAttack description: Under DDoS attack or not type: Metric @@ -116,6 +144,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: BytesInDDoS description: Inbound bytes DDoS type: Metric @@ -134,6 +163,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 8000000.0 + autoMitigate: false - name: VipAvailability description: Average IP Address availability per time duration type: Metric @@ -152,6 +182,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: PacketsInDDoS description: Inbound packets DDoS type: Metric @@ -170,6 +201,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 40000.0 + autoMitigate: false - name: TCPPacketsInDDoS description: Inbound TCP packets DDoS type: Metric @@ -188,6 +220,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 40000.0 + autoMitigate: false - name: TCPBytesInDDoS description: Inbound TCP bytes DDoS type: Metric @@ -206,6 +239,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 40000.0 + autoMitigate: false - name: UDPPacketsInDDoS description: Inbound UDP packets DDoS type: Metric @@ -224,6 +258,7 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 40000.0 + autoMitigate: false - name: UDPBytesInDDoS description: Inbound UDP bytes DDoS type: Metric @@ -242,3 +277,4 @@ operator: GreaterThanOrEqual criterionType: StaticThresholdCriterion threshold: 40000.0 + autoMitigate: false diff --git a/services/Network/routeTables/alerts.yaml b/services/Network/routeTables/alerts.yaml index 88b28a13e..b69d75cb9 100644 --- a/services/Network/routeTables/alerts.yaml +++ b/services/Network/routeTables/alerts.yaml @@ -1,19 +1,28 @@ - name: Activity Log Route Table Update description: Activity Log Alert for Route Table Update type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: category: Administrative operationName: Microsoft.Network/routeTables/routes/write - status: [ succeeded ] + status: + - succeeded references: - name: Activity Log Service Notifications url: https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log Route Table Update Alert + - name: Deploy Activity Log Route Table Update Alert template: Deploy-ActivityLog-RouteTable-Update.json + type: Policy + tags: + - alz + properties: + scope: Resource + policyScope: managementGroup + documented: false + alertName: ActivityUDRUpdate diff --git a/services/Network/virtualNetworkGateways/alerts.yaml b/services/Network/virtualNetworkGateways/alerts.yaml index beb1f3152..4ff929948 100644 --- a/services/Network/virtualNetworkGateways/alerts.yaml +++ b/services/Network/virtualNetworkGateways/alerts.yaml @@ -15,12 +15,19 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Virtual Network Gateway Tunnel Bandwidth Alert + - name: Deploy VNetG Tunnel Bandwidth Alert template: Deploy-VNETG-BandwidthUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Bytes description: Metric Alert for VNet Gateway Tunnel Egress Bytes type: Metric @@ -38,12 +45,19 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Virtual Network Gateway Tunnel Egress Alert + - name: Deploy VNetG Tunnel Egress Alert template: Deploy-VNETG-Egress-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Packet Drop Count description: Metric Alert for Vnet Gateway tunnel TunnelEgressPacketDropCount type: Metric @@ -64,12 +78,19 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Vnet Gateway Egress Packet Drop Count Alert + - name: Deploy VNetG Egress Packet Drop Count Alert template: Deploy-VNETG-EgressPacketDropCount-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Packet Drop TS Mismatch description: Metric Alert for Vnet Gateway tunnel TunnelEgressPacketDropTSMismatch type: Metric @@ -90,12 +111,19 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Vnet Gateway Egress Packet Drop Mismatch Alert + - name: Deploy VNetG Egress Packet Drop Mismatch Alert template: Deploy-VNETG-EgressPacketDropMismatch-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: ExpressRoute Gateway Bits Per Second description: Metric Alert for VNet Gateway Express Route Bits Per Second type: Metric @@ -113,13 +141,19 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Virtual Network Gateway Express Route Bits - Per Second Alert + - name: Deploy VNetG ExpressRoute Bits Per Second Alert template: Deploy-VNETG-ERGBitsPerSecond-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: ExpressRoute Gateway CPU Utilization description: CPU Utilization of the ExpressRoute Gateway type: Metric @@ -137,13 +171,19 @@ operator: GreaterThan threshold: 80 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Virtual Network Gateway Express Route CPU - Utilization + - name: Deploy VNetG ExpressRoute CPU Utilization Alert template: Deploy-VNETG-ERGCPUUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Bytes description: Metric Alert for VNet Gateway Tunnel ingress Bytes type: Metric @@ -161,12 +201,19 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Virtual Network Gateway Tunnel Ingress Alert + - name: Deploy VNetG Tunnel Ingress Alert template: Deploy-VNETG-Ingress-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Packet Drop Count description: Metric Alert for Vnet Gateway tunnel TunnelIngressPacketDropCount type: Metric @@ -187,12 +234,19 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Vnet Gateway Ingress Packet Drop Count Alert + - name: Deploy VNetG Ingress Packet Drop Count Alert template: Deploy-VNETG-IngressPacketDropCount-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Packet Drop TS Mismatch description: Metric Alert for Vnet Gateway tunnel TunnelIngressPacketDropTSMismatch type: Metric @@ -213,13 +267,19 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/virtualnetworkgateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics + - name: Supported metrics for microsoft.network/virtualnetworkgateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworkgateways-metrics deployments: - - description: Policy to audit/deploy Vnet Gateway Ingress Packet Drop Mismatch - Alert + - name: Deploy VNetG Egress Packet Drop Mismatch Alert template: Deploy-VNETG-IngressPacketDropMismatch-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: TunnelAverageBandwidth description: Average bandwidth of a tunnel in bytes per second type: Metric @@ -238,6 +298,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: ExpressRouteGatewayCpuUtilization description: CPU Utilization of the ExpressRoute Gateway type: Metric @@ -256,6 +317,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: AverageBandwidth description: Site-to-site bandwidth of a gateway in bytes per second type: Metric @@ -274,3 +336,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 450000000.0 + autoMitigate: false diff --git a/services/Network/virtualNetworks/alerts.yaml b/services/Network/virtualNetworks/alerts.yaml index 113c5cb32..2afa44b1c 100644 --- a/services/Network/virtualNetworks/alerts.yaml +++ b/services/Network/virtualNetworks/alerts.yaml @@ -15,12 +15,19 @@ operator: GreaterThan threshold: 0 criterionType: StaticThresholdCriterion + autoMitigate: false references: - name: Supported metrics for Microsoft.Network/virtualNetworks url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-virtualnetworks-metrics deployments: - - description: Policy to audit/deploy Virtual Network DDoS Attack Alert + - name: Deploy VNet DDoS Attack Alert template: Deploy-VNET-DDOSAttack-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: IfUnderDDoSAttack description: Under DDoS attack or not type: Metric @@ -39,3 +46,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false diff --git a/services/Network/vpnGateways/alerts.yaml b/services/Network/vpnGateways/alerts.yaml index 407398e29..620035fcc 100644 --- a/services/Network/vpnGateways/alerts.yaml +++ b/services/Network/vpnGateways/alerts.yaml @@ -1,22 +1,31 @@ - name: Activity Log VPN Gateway Delete description: Activity Log Alert for VPN Gateway Delete type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: category: Administrative operationName: Microsoft.Network/vpnGateways/delete - status: [ succeeded ] + status: + - succeeded deployments: - - description: Policy to Deploy Activity Log VPN Gateway Delete Alert + - name: Deploy Activity Log VPN Gateway Delete Alert template: Deploy-ActivityLog-VPNG-Del.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: false + alertName: ActivityVPNGatewayDelete - name: Tunnel Average Bandwidth description: Metric Alert for VPN Gateway Bandwidth Utilization type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -29,19 +38,26 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Activity Log Service Notifications - url: https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal - - name: Best practices for setting up service health alerts - url: https://www.microsoft.com/videoplayer/embed/RE2OtUa + - name: Activity Log Service Notifications + url: https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal + - name: Best practices for setting up service health alerts + url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to audit/deploy VPN Gateway Bandwidth Utilization Alert + - name: Deploy VPNG Bandwidth Utilization Alert template: Deploy-VPNG-BandwidthUtilization-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: BGP Peer Status description: Metric Alert for VPN Gateway BGP peer status type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -54,17 +70,24 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway BGP Peer Status Alert + - name: Deploy VPNG BGP Peer Status Alert template: Deploy-VPNG-BGPPeerStatus-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Bytes description: Metric Alert for VPN Gateway tunnel egress bytes type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -77,17 +100,24 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway Egress Alert + - name: Deploy VPNG Egress Alert template: Deploy-VPNG-Egress-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Packet Drop Count description: Metric Alert for VPN Gateway tunnel TunnelEgressPacketDropCount type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -103,17 +133,24 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway Egress Packet Drop Count Alert + - name: Deploy VPNG Egress Packet Drop Count Alert template: Deploy-VPNG-EgressPacketDropCount-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Egress Packet Drop TS Mismatch description: Metric Alert for VPN Gateway tunnel TunnelEgressPacketDropTSMismatch type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -129,17 +166,24 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway Egress Packet Drop Mismatch Alert + - name: Deploy VPNG Egress Packet Drop Mismatch Alert template: Deploy-VPNG-EgressPacketDropMismatch-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Bytes description: Metric Alert for VPN Gateway tunnel ingress bytes type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -152,17 +196,24 @@ operator: LessThan threshold: 1 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway Ingress Alert + - name: Deploy VPNG Ingress Alert template: Deploy-VPNG-Ingress-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Packet Drop Count description: Metric Alert for VPN Gateway tunnel TunnelEgressPacketDropCount type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -178,17 +229,24 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy VPN Gateway Ingress Packet Drop Count Alert + - name: Deploy VNetG Ingress Packet Drop Count Alert template: Deploy-VNETG-IngressPacketDropMismatch-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: Tunnel Ingress Packet Drop TS Mismatch description: Metric Alert for Vnet Gateway tunnel TunnelIngressPacketDropTSMismatch type: Metric - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -204,9 +262,16 @@ failingPeriods: numberOfEvaluationPeriods: 4 minFailingPeriodsToAlert: 4 + autoMitigate: false references: - - name: Supported metrics for microsoft.network/vpngateways - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics + - name: Supported metrics for microsoft.network/vpngateways + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-network-vpngateways-metrics deployments: - - description: Policy to audit/deploy Vnet Gateway Ingress Packet Drop Mismatch Alert + - name: Deploy VPNG Ingress Packet Drop Mismatch Alert template: Deploy-VNETG-IngressPacketDropMismatch-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false diff --git a/services/OperationalInsights/workspaces/alerts.yaml b/services/OperationalInsights/workspaces/alerts.yaml index fdbc99387..03f891111 100644 --- a/services/OperationalInsights/workspaces/alerts.yaml +++ b/services/OperationalInsights/workspaces/alerts.yaml @@ -16,8 +16,16 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log LA Workspace Delete Alert + - name: Deploy Activity Log LA Workspace Delete Alert template: Deploy-ActivityLog-LAWorkspace-Del.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: false + alertName: ActivityLAWorkspaceDelete - name: Activity Log LA Workspace Regenerate Key description: Activity Log Alert for LA Workspace Regenerate Key type: ActivityLog @@ -36,8 +44,16 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Activity Log LA Workspace Regenerate Key Alert + - name: Deploy Activity Log LA Workspace Regenerate Key Alert template: Deploy-ActivityLog-LAWorkspace-KeyRegen.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: false + alertName: ActivityLAWorkspaceRegenKey - name: Average_% Free Space description: 'Average_% Free Space. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -62,6 +78,7 @@ values: - '*' threshold: 10.0 + autoMitigate: false - name: Average_% Processor Time description: 'Average_% Processor Time. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -86,6 +103,7 @@ values: - '*' threshold: 90.0 + autoMitigate: false - name: Average_% Used Memory description: 'Average_% Used Memory. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -110,6 +128,7 @@ values: - '*' threshold: 90.0 + autoMitigate: false - name: Average_% Used Space description: 'Average_% Used Space. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -134,6 +153,7 @@ values: - '*' threshold: 90.0 + autoMitigate: false - name: Average_% Committed Bytes In Use description: 'Average_% Committed Bytes In Use. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -158,6 +178,7 @@ values: - '*' threshold: 90.0 + autoMitigate: false - name: Heartbeat description: 'Heartbeat. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -182,6 +203,7 @@ values: - '*' threshold: 0.0 + autoMitigate: false - name: Average_Disk Transfers/sec description: 'Average_Disk Transfers/sec. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -206,6 +228,7 @@ values: - '*' threshold: 100.0 + autoMitigate: false - name: Average_Available MBytes description: 'Average_Available MBytes. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -230,6 +253,7 @@ values: - '*' threshold: 512.0 + autoMitigate: false - name: Average_Available MBytes Memory description: 'Average_Available MBytes Memory. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -254,6 +278,7 @@ values: - '*' threshold: 1.0 + autoMitigate: false - name: Average_% Available Memory description: 'Average_% Available Memory. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -278,6 +303,7 @@ values: - '*' threshold: 10.0 + autoMitigate: false - name: Average_Free Megabytes description: 'Average_Free Megabytes. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -307,6 +333,7 @@ - 'c:' - / threshold: 10240.0 + autoMitigate: false - name: Average_% Used Inodes description: 'Average_% Used Inodes. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -331,6 +358,7 @@ values: - '*' threshold: 80.0 + autoMitigate: false - name: Average_% Used Swap Space description: 'Average_% Used Swap Space. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -363,6 +391,7 @@ values: - '*' threshold: 50.0 + autoMitigate: false - name: Average_Current Disk Queue Length description: 'Average_Current Disk Queue Length. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -395,6 +424,7 @@ values: - '*' threshold: 40.0 + autoMitigate: false - name: Event description: 'Event. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' type: Metric @@ -430,6 +460,7 @@ values: - '1074' threshold: 0.0 + autoMitigate: false - name: Average_Pages/sec description: 'Average_Pages/sec. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -454,6 +485,7 @@ values: - '*' threshold: 5000.0 + autoMitigate: false - name: Average_Bytes Sent/sec description: 'Average_Bytes Sent/sec. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -493,6 +525,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Average_Bytes Received/sec description: 'Average_Bytes Received/sec. Supported for: Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -532,6 +565,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Average_Total Bytes Transmitted description: 'Average_Total Bytes Transmitted. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -571,6 +605,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Average_Total Bytes Received description: 'Average_Total Bytes Received. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -610,6 +645,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Average_% IO Wait Time description: 'Average_% IO Wait Time. Supported for: Linux. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -634,6 +670,7 @@ values: - vm-pm-public-jump-001 threshold: 75.0 + autoMitigate: false - name: Average_% User Time description: 'Average_% User Time. Supported for: Linux, Windows. Part of [metric alerts for logs feature](https://aka.ms/am-log-to-metric).' @@ -658,3 +695,4 @@ values: - vm-pm-public-jump-001 threshold: 90.0 + autoMitigate: false diff --git a/services/RecoveryServices/alerts.yaml b/services/RecoveryServices/alerts.yaml deleted file mode 100644 index 9f72b0ce5..000000000 --- a/services/RecoveryServices/alerts.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- name: Deploy RV Backup Health Monitoring Alerts - description: Deploy RecoveryVault BackupHealthMonitor Alert - type: Microsoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings - properties: - category: Site Recovery - operationName: Microsoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures" - status: [ Enabled ] - references: - - name: Azure Monitor Alerts for Azure Backup - url: https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal - - name: Move to Azure Monitor Alerts - url: https://learn.microsoft.com/en-us/azure/backup/move-to-azure-monitor-alerts - deployments: - - description: Policy to audit/update Recovery Vault Backup Health Alerting to Azure monitor alerts - template: Modify-RSV-BackupHealth-Alert.json \ No newline at end of file diff --git a/services/RecoveryServices/vaults/alerts.yaml b/services/RecoveryServices/vaults/alerts.yaml index df9305c12..bd59bf381 100644 --- a/services/RecoveryServices/vaults/alerts.yaml +++ b/services/RecoveryServices/vaults/alerts.yaml @@ -1,3 +1,30 @@ +- name: Backup Health Monitoring + description: Deploy RecoveryVault BackupHealthMonitor Alert + type: Unknown + verified: true + visible: true + tags: + - alz + properties: + category: Site Recovery + operationName: Microsoft.RecoveryServices/vaults/monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures" + status: + - Enabled + references: + - name: Azure Monitor Alerts for Azure Backup + url: https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal + - name: Move to Azure Monitor Alerts + url: https://learn.microsoft.com/en-us/azure/backup/move-to-azure-monitor-alerts + deployments: + - name: Deploy RV Backup Health Monitoring Alerts + monitor alerts + template: Modify-RSV-BackupHealth-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: BackupHealthEvent description: The count of health events pertaining to backup job health type: Metric diff --git a/services/Resources/subscriptions/alerts.yaml b/services/Resources/subscriptions/alerts.yaml index f9cd2c83e..856e40a71 100644 --- a/services/Resources/subscriptions/alerts.yaml +++ b/services/Resources/subscriptions/alerts.yaml @@ -1,27 +1,39 @@ - name: Resource Health Unhealthy description: Resource Health Unhealthy Alert type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: category: ResourceHealth - causes: [ PlatoformInitiated, UserInitiated ] - currentHealthStatus: [ Degraded, Unavailable ] + causes: + - PlatoformInitiated + - UserInitiated + currentHealthStatus: + - Degraded + - Unavailable references: - name: Resource Health url: https://learn.microsoft.com/azure/service-health/resource-health-overview - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Resource Health Unhealthy Alert + - name: Deploy Resource Health Unhealthy Alert template: Deploy-ActivityLog-ResourceHealth-UnHealthly-Alert.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: true + alertName: ResourceHealthUnhealthyAlert - name: Service Health Advisory description: Service Health Advisory Alert type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -33,13 +45,21 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Service Health Advisory Alert + - name: Deploy Service Health Advisory Alert template: Deploy-ActivityLog-ServiceHealth-Health.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: true + alertName: ServiceHealthAdvisoryEvent - name: Service Health Incident description: Service Health Incident Alert type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -51,13 +71,21 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Service Health Incident Alert + - name: Deploy Service Health Incident Alert template: Deploy-ActivityLog-ServiceHealth-Incident.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: true + alertName: ServiceHealthIncident - name: Service Health Maintenance description: Service Health Maintenance Alert type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -69,13 +97,21 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Service Health Maintenance Alert + - name: Deploy Service Health Maintenance Alert template: Deploy-ActivityLog-ServiceHealth-Maintenance.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: true + alertName: ServiceHealthPlannedMaintenance - name: Service Health Security description: Service Health Security Alert type: ActivityLog - verified: False - visible: True + verified: false + visible: true tags: - alz properties: @@ -87,5 +123,13 @@ - name: Best practices for setting up service health alerts url: https://www.microsoft.com/videoplayer/embed/RE2OtUa deployments: - - description: Policy to Deploy Service Health Security Alert + - name: Deploy Service Health Security Advisory Alert template: Deploy-ActivityLog-ServiceHealth-Security.json + type: Policy + tags: + - alz + properties: + scope: Subscription + policyScope: managementGroup + documented: true + alertName: ServiceHealthSecurityIncident diff --git a/services/Storage/storageAccounts/alerts.yaml b/services/Storage/storageAccounts/alerts.yaml index db7329767..2f63380d4 100644 --- a/services/Storage/storageAccounts/alerts.yaml +++ b/services/Storage/storageAccounts/alerts.yaml @@ -1,4 +1,4 @@ -- name: Deploy SA Availability Alert +- name: Availability description: The percentage of availability for the storage service or the specified API operation. Availability is calculated by taking the TotalBillableRequests value and dividing it by the number of applicable requests, including those that @@ -19,14 +19,21 @@ operator: LessThan threshold: 90 criterionType: StaticThresholdCriterion + autoMitigate: false references: - - name: Monitoring Availability - url: https://learn.microsoft.com/en-us/troubleshoot/azure/azure-storage/storage-monitoring-diagnosing-troubleshooting?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=dotnet#monitoring-availability - - name: Supported metrics for Microsoft.Storage/storageAccounts - url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-storage-storageaccounts-metrics + - name: Monitoring Availability + url: https://learn.microsoft.com/en-us/troubleshoot/azure/azure-storage/storage-monitoring-diagnosing-troubleshooting?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=dotnet#monitoring-availability + - name: Supported metrics for Microsoft.Storage/storageAccounts + url: https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/microsoft-storage-storageaccounts-metrics deployments: - - description: Policy to audit/deploy SA Availability Alert + - name: Deploy SA Availability Alert template: Deploy-SA-Availability-Alert.json + type: Policy + tags: + - alz + properties: + scope: Resource + multiResource: false - name: UsedCapacity description: The amount of storage used by the storage account. For standard storage accounts, it's the sum of capacity used by blob, table, file, and queue. For premium @@ -48,6 +55,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 4000000000000000.0 + autoMitigate: false - name: Egress description: The amount of egress data. This number includes egress to external client from Azure Storage as well as egress within Azure. As a result, this number @@ -68,6 +76,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 60000000.0 + autoMitigate: false - name: Ingress description: The amount of ingress data, in bytes. This number includes ingress from an external client into Azure Storage as well as ingress within Azure. @@ -87,6 +96,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1073741824.0 + autoMitigate: false - name: BlobCapacity description: The amount of storage used by the storage account's Blob service in bytes. @@ -106,6 +116,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 107374182400.0 + autoMitigate: false - name: SuccessE2ELatency description: The average end-to-end latency of successful requests made to a storage service or the specified API operation, in milliseconds. This value includes the @@ -127,6 +138,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false - name: BlobCount description: The number of blob objects stored in the storage account. type: Metric @@ -145,6 +157,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: SuccessServerLatency description: The average time used to process a successful request by Azure Storage. This value does not include the network latency specified in SuccessE2ELatency. @@ -164,6 +177,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false - name: QueueMessageCount description: The number of unexpired queue messages in the storage account. type: Metric @@ -182,6 +196,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1000.0 + autoMitigate: false - name: QueueCapacity description: The amount of Queue storage used by the storage account. type: Metric @@ -200,6 +215,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: QueueCount description: The number of queues in the storage account. type: Metric @@ -218,6 +234,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: FileCapacity description: The amount of File storage used by the storage account. type: Metric @@ -236,6 +253,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 96636764160.0 + autoMitigate: false - name: Transactions description: The number of requests made to a storage service or the specified API operation. This number includes successful and failed requests, as well as requests @@ -266,6 +284,7 @@ values: - anonymous threshold: 10.0 + autoMitigate: false - name: FileShareSnapshotCount description: The number of snapshots present on the share in storage account's Files Service. @@ -285,6 +304,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 190.0 + autoMitigate: false - name: Availability description: The percentage of availability for the storage service or the specified API operation. Availability is calculated by taking the TotalBillableRequests @@ -307,6 +327,7 @@ operator: LessThanOrEqual criterionType: StaticThresholdCriterion threshold: 99.9 + autoMitigate: false - name: FileShareCount description: The number of file shares in the storage account. type: Metric @@ -325,6 +346,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: FileShareCapacityQuota description: The upper limit on the amount of storage that can be used by Azure Files Service in bytes. @@ -344,3 +366,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 108851651149824.0 + autoMitigate: false diff --git a/services/StorageSync/storageSyncServices/alerts.yaml b/services/StorageSync/storageSyncServices/alerts.yaml index c4fe3257a..e00462038 100644 --- a/services/StorageSync/storageSyncServices/alerts.yaml +++ b/services/StorageSync/storageSyncServices/alerts.yaml @@ -17,6 +17,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: StorageSyncSyncSessionPerItemErrorsCount description: Count of files failed to sync type: Metric @@ -35,6 +36,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: ServerSyncSessionResult description: Metric that logs a value of 1 each time the Server Endpoint successfully completes a Sync Session with the Cloud Endpoint @@ -54,3 +56,4 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false diff --git a/services/StreamAnalytics/streamingjobs/alerts.yaml b/services/StreamAnalytics/streamingjobs/alerts.yaml index 1f5d5af34..51be0ad94 100644 --- a/services/StreamAnalytics/streamingjobs/alerts.yaml +++ b/services/StreamAnalytics/streamingjobs/alerts.yaml @@ -16,6 +16,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: ResourceUtilization description: SU (Memory) % Utilization type: Metric @@ -34,6 +35,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: OutputWatermarkDelaySeconds description: Watermark Delay type: Metric @@ -52,6 +54,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 300.0 + autoMitigate: false - name: OutputEvents description: Output Events type: Metric @@ -70,6 +73,7 @@ operator: LessThanOrEqual criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: ConversionErrors description: Data Conversion Errors type: Metric @@ -88,6 +92,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: InputEvents description: Input Events type: Metric @@ -106,6 +111,7 @@ operator: LessThanOrEqual criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: DeserializationError description: Input Deserialization Errors type: Metric @@ -124,6 +130,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: InputEventsSourcesBacklogged description: Backlogged Input Events type: Metric @@ -142,6 +149,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100000.0 + autoMitigate: false - name: ProcessCPUUsagePercentage description: CPU % Utilization type: Metric @@ -160,6 +168,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false - name: AMLCalloutFailedRequests description: Failed Function Requests type: Metric @@ -178,3 +187,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false diff --git a/services/Synapse/workspaces/alerts.yaml b/services/Synapse/workspaces/alerts.yaml index 62a814082..5d7b93705 100644 --- a/services/Synapse/workspaces/alerts.yaml +++ b/services/Synapse/workspaces/alerts.yaml @@ -22,6 +22,7 @@ values: - failed threshold: 0.0 + autoMitigate: false - name: BuiltinSqlPoolLoginAttempts description: Count of login attempts that succeded or failed type: Metric @@ -40,6 +41,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 360.0 + autoMitigate: false - name: BuiltinSqlPoolDataProcessedBytes description: Amount of data processed by queries type: Metric @@ -58,6 +60,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 386547056640.0 + autoMitigate: false - name: SQLStreamingRuntimeErrors description: This is a preview metric available in East US, West Europe. Total number of errors related to query processing (excluding errors found while ingesting @@ -78,6 +81,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: IntegrationTriggerRunsEnded description: Count of integration triggers that succeeded, failed, or were cancelled type: Metric @@ -101,6 +105,7 @@ values: - failed threshold: 0.0 + autoMitigate: false - name: DWUUsedPercent description: Represents a high-level representation of usage across the SQL pool. Measured by taking the maximum between CPU percentage and Data IO percentage @@ -120,6 +125,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: CPUPercent description: CPU utilization across all nodes in the SQL pool type: Metric @@ -138,6 +144,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: MemoryUsedPercent description: Memory utilization across all nodes in the SQL pool type: Metric @@ -156,3 +163,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 + autoMitigate: false diff --git a/services/Web/hostingEnvironments/alerts.yaml b/services/Web/hostingEnvironments/alerts.yaml index 21de98260..ca999cabc 100644 --- a/services/Web/hostingEnvironments/alerts.yaml +++ b/services/Web/hostingEnvironments/alerts.yaml @@ -16,6 +16,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 75.0 + autoMitigate: false - name: HttpQueueLength description: The average number of HTTP requests that had to sit on the queue before being fulfilled. A high or increasing HTTP Queue length is a symptom of a plan @@ -36,3 +37,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false diff --git a/services/Web/serverFarms/alerts.yaml b/services/Web/serverFarms/alerts.yaml index 43fee86b0..8de124c81 100644 --- a/services/Web/serverFarms/alerts.yaml +++ b/services/Web/serverFarms/alerts.yaml @@ -16,6 +16,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: MemoryPercentage description: The average memory used across all instances of the plan. type: Metric @@ -34,6 +35,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 + autoMitigate: false - name: HttpQueueLength description: The average number of HTTP requests that had to sit on the queue before being fulfilled. A high or increasing HTTP Queue length is a symptom of a plan @@ -54,6 +56,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: DiskQueueLength description: The average number of both read and write requests that were queued on storage. A high disk queue length is an indication of an app that might be @@ -74,6 +77,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: SocketOutboundAll description: The average number of sockets used for outbound connections across all the instances of the plan irrespective of their TCP states. Having too many @@ -94,6 +98,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 2000.0 + autoMitigate: false - name: TcpTimeWait description: The average number of sockets in TIME_WAIT state across all the instances of the plan. @@ -113,6 +118,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 300.0 + autoMitigate: false - name: BytesReceived description: The average incoming bandwidth used across all instances of the plan. type: Metric @@ -134,6 +140,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Low + autoMitigate: false - name: BytesSent description: The average outgoing bandwidth used across all instances of the plan. type: Metric @@ -155,6 +162,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Low + autoMitigate: false - name: TcpEstablished description: The average number of sockets in ESTABLISHED state across all the instances of the plan. @@ -174,3 +182,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 3000.0 + autoMitigate: false diff --git a/services/Web/sites/alerts.yaml b/services/Web/sites/alerts.yaml index 565570eee..71702ae1c 100644 --- a/services/Web/sites/alerts.yaml +++ b/services/Web/sites/alerts.yaml @@ -17,6 +17,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 30.0 + autoMitigate: false - name: CpuTime description: The amount of CPU consumed by the app, in seconds. For more information about this metric. Please see https://aka.ms/website-monitor-cpu-time-vs-cpu-percentage @@ -37,6 +38,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 120.0 + autoMitigate: false - name: PrivateBytes description: Private Bytes is the current size, in bytes, of memory that the app process has allocated that can't be shared with other processes. For WebApps and @@ -57,6 +59,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1200000000.0 + autoMitigate: false - name: RequestsInApplicationQueue description: The number of requests in the application request queue. For WebApps and FunctionApps. @@ -81,6 +84,7 @@ values: - '*' threshold: 10.0 + autoMitigate: false - name: AppConnections description: The number of bound sockets existing in the sandbox (w3wp.exe and its child processes). A bound socket is created by calling bind()/connect() APIs and @@ -107,6 +111,7 @@ values: - '*' threshold: 6000.0 + autoMitigate: false - name: Http401 description: The count of requests resulting in HTTP 401 status code. For WebApps and FunctionApps. @@ -126,6 +131,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 20.0 + autoMitigate: false - name: Http404 description: The count of requests resulting in HTTP 404 status code. For WebApps and FunctionApps. @@ -145,6 +151,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + autoMitigate: false - name: FileSystemUsage description: Percentage of filesystem quota consumed by the app. For WebApps and FunctionApps. @@ -164,6 +171,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 400000000.0 + autoMitigate: false - name: MemoryWorkingSet description: The current amount of memory used by the app, in MiB. For WebApps and FunctionApps. @@ -183,6 +191,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1500000000.0 + autoMitigate: false - name: FunctionExecutionCount description: Function Execution Count. For FunctionApps only. type: Metric @@ -201,6 +210,7 @@ operator: LessThanOrEqual criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: Threads description: The number of threads currently active in the app process. For WebApps and FunctionApps. @@ -220,6 +230,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: BytesSent description: The amount of outgoing bandwidth consumed by the app, in MiB. For WebApps and FunctionApps. @@ -242,6 +253,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Low + autoMitigate: false - name: Http406 description: The count of requests resulting in HTTP 406 status code. For WebApps and FunctionApps. @@ -261,6 +273,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 1.0 + autoMitigate: false - name: BytesReceived description: The amount of incoming bandwidth consumed by the app, in MiB. For WebApps and FunctionApps. @@ -280,6 +293,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 2048000000.0 + autoMitigate: false - name: Http3xx description: The count of requests resulting in an HTTP status code >= 300 but < 400. For WebApps and FunctionApps. @@ -299,6 +313,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 15.0 + autoMitigate: false - name: Handles description: The total number of handles currently open by the app process. For WebApps and FunctionApps. @@ -321,6 +336,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Low + autoMitigate: false - name: FunctionExecutionUnits description: Function Execution Units. For FunctionApps only. type: Metric @@ -339,6 +355,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 13000000000.0 + autoMitigate: false - name: Http2xx description: The count of requests resulting in an HTTP status code >= 200 but < 300. For WebApps and FunctionApps. @@ -358,6 +375,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 15.0 + autoMitigate: false - name: WorkflowRunsFailureRate description: Workflow Runs Failure Rate. For LogicApps only. type: Metric @@ -376,6 +394,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 + autoMitigate: false - name: Gen2Collections description: The number of times the generation 2 objects are garbage collected since the start of the app process. For WebApps and FunctionApps. @@ -398,6 +417,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Gen0Collections description: The number of times the generation 0 objects are garbage collected since the start of the app process. Higher generation GCs include all lower generation @@ -421,6 +441,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Gen1Collections description: The number of times the generation 1 objects are garbage collected since the start of the app process. Higher generation GCs include all lower generation @@ -444,6 +465,7 @@ numberOfEvaluationPeriods: 4 minFailingPeroidsToAlert: minFailingPeroidsToAlert alertSensitivity: Medium + autoMitigate: false - name: Http5xx description: The count of requests resulting in an HTTP status code >= 500 but < 600. @@ -463,6 +485,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 10.0 + autoMitigate: false - name: HttpResponseTime description: The time taken for the app to serve requests, in seconds. type: Metric @@ -481,6 +504,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 5.0 + autoMitigate: false - name: Http4xx description: The count of requests resulting in an HTTP status code >= 400 but < 500. @@ -500,6 +524,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 5.0 + autoMitigate: false - name: AverageMemoryWorkingSet description: The average amount of memory used by the app, in megabytes (MiB). type: Metric @@ -518,6 +543,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 800000000.0 + autoMitigate: false - name: Requests description: The total number of requests regardless of their resulting HTTP status code. @@ -537,6 +563,7 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 2000.0 + autoMitigate: false - name: HealthCheckStatus description: Health check status type: Metric @@ -555,6 +582,7 @@ operator: LessThan criterionType: StaticThresholdCriterion threshold: 100.0 + autoMitigate: false - name: Http403 description: The count of requests resulting in HTTP 403 status code. type: Metric @@ -573,3 +601,4 @@ operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 5.0 + autoMitigate: false