From f4613e858d3e2477d3d4f7630a086917fc2c956f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 18 Oct 2024 13:22:19 +0000 Subject: [PATCH] [GitHub Action - Generate Templates] Generate templates for alerts --- ..._bb83ae53-3b91-49ff-a351-9545ea747335.json | 199 ++++++++++++++ ..._282cb835-592b-4e49-89c1-ea1aaefb37c4.json | 199 ++++++++++++++ ..._a2f5e5a3-ec38-4201-970f-971ad4707b93.json | 199 ++++++++++++++ ..._e840c7a9-e070-4dc9-919b-c75a3679f6b7.json | 199 ++++++++++++++ ..._8bafb3d5-d961-49a4-9867-f1cddfd703e0.json | 199 ++++++++++++++ ...bb83ae53-3b91-49ff-a351-9545ea747335.bicep | 140 ++++++++++ ...282cb835-592b-4e49-89c1-ea1aaefb37c4.bicep | 140 ++++++++++ ...a2f5e5a3-ec38-4201-970f-971ad4707b93.bicep | 140 ++++++++++ ...e840c7a9-e070-4dc9-919b-c75a3679f6b7.bicep | 140 ++++++++++ ...8bafb3d5-d961-49a4-9867-f1cddfd703e0.bicep | 140 ++++++++++ ..._0337a76f-238e-4d4d-9cd1-48b205874dbb.json | 199 ++++++++++++++ ..._be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.json | 199 ++++++++++++++ ..._a171bc0c-676f-464b-a7b5-e50cd6c612a2.json | 199 ++++++++++++++ ...0337a76f-238e-4d4d-9cd1-48b205874dbb.bicep | 135 +++++++++ ...be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.bicep | 135 +++++++++ ...a171bc0c-676f-464b-a7b5-e50cd6c612a2.bicep | 135 +++++++++ ..._5611eb31-51c7-4279-ab8c-97dba0a2c044.json | 95 +++++++ ...5611eb31-51c7-4279-ab8c-97dba0a2c044.bicep | 68 +++++ ..._ebd68fdd-9672-43e8-b7d5-6e479210535d.json | 256 ++++++++++++++++++ ..._ecec6f93-af7e-4071-b35d-cd70b3f16581.json | 256 ++++++++++++++++++ ..._2feba8fd-ff1e-4f48-bc01-6e2996edafa6.json | 256 ++++++++++++++++++ ..._48fc094d-8a00-4d3c-86d3-3230c7e5881a.json | 256 ++++++++++++++++++ ..._4eeca790-a804-4453-b339-73ea425610bc.json | 256 ++++++++++++++++++ ..._59298086-ec77-4f47-b2ef-b853b79e31cb.json | 256 ++++++++++++++++++ ..._8f231351-c123-4e4c-8631-9978e641a3ca.json | 256 ++++++++++++++++++ ..._9d086772-1887-4893-8b9f-7e5169398bae.json | 256 ++++++++++++++++++ ..._7f951991-c6ce-4c72-9f55-7eade2c4f57c.json | 199 ++++++++++++++ ...ebd68fdd-9672-43e8-b7d5-6e479210535d.bicep | 183 +++++++++++++ ...ecec6f93-af7e-4071-b35d-cd70b3f16581.bicep | 183 +++++++++++++ ...2feba8fd-ff1e-4f48-bc01-6e2996edafa6.bicep | 183 +++++++++++++ ...48fc094d-8a00-4d3c-86d3-3230c7e5881a.bicep | 183 +++++++++++++ ...4eeca790-a804-4453-b339-73ea425610bc.bicep | 183 +++++++++++++ ...59298086-ec77-4f47-b2ef-b853b79e31cb.bicep | 183 +++++++++++++ ...8f231351-c123-4e4c-8631-9978e641a3ca.bicep | 183 +++++++++++++ ...9d086772-1887-4893-8b9f-7e5169398bae.bicep | 183 +++++++++++++ ...7f951991-c6ce-4c72-9f55-7eade2c4f57c.bicep | 135 +++++++++ 36 files changed, 6706 insertions(+) create mode 100644 services/AVS/privateClouds/templates/arm/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.json create mode 100644 services/AVS/privateClouds/templates/arm/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.json create mode 100644 services/AVS/privateClouds/templates/arm/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.json create mode 100644 services/AVS/privateClouds/templates/arm/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.json create mode 100644 services/AVS/privateClouds/templates/arm/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.json create mode 100644 services/AVS/privateClouds/templates/bicep/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.bicep create mode 100644 services/AVS/privateClouds/templates/bicep/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.bicep create mode 100644 services/AVS/privateClouds/templates/bicep/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.bicep create mode 100644 services/AVS/privateClouds/templates/bicep/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.bicep create mode 100644 services/AVS/privateClouds/templates/bicep/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.bicep create mode 100644 services/MachineLearningServices/workspaces/templates/arm/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.json create mode 100644 services/MachineLearningServices/workspaces/templates/arm/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.json create mode 100644 services/MachineLearningServices/workspaces/templates/arm/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.json create mode 100644 services/MachineLearningServices/workspaces/templates/bicep/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.bicep create mode 100644 services/MachineLearningServices/workspaces/templates/bicep/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.bicep create mode 100644 services/MachineLearningServices/workspaces/templates/bicep/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.bicep create mode 100644 services/Search/searchServices/templates/arm/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.json create mode 100644 services/Search/searchServices/templates/bicep/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.json create mode 100644 services/StorageCache/AmlFilesystems/templates/arm/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.json create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.bicep create mode 100644 services/StorageCache/AmlFilesystems/templates/bicep/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.bicep diff --git a/services/AVS/privateClouds/templates/arm/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.json b/services/AVS/privateClouds/templates/arm/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.json new file mode 100644 index 000000000..10fe083d3 --- /dev/null +++ b/services/AVS/privateClouds/templates/arm/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "CPU Usage per Cluster (Critical)", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 0, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "95", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT30M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "EffectiveCpuAverage", + "dimensions": [{"operator": "include", "name": "clustername", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/AVS/privateClouds/templates/arm/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.json b/services/AVS/privateClouds/templates/arm/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.json new file mode 100644 index 000000000..a3252a726 --- /dev/null +++ b/services/AVS/privateClouds/templates/arm/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "CPU Usage per Cluster", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "80", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT30M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "EffectiveCpuAverage", + "dimensions": [{"operator": "include", "name": "clustername", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/AVS/privateClouds/templates/arm/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.json b/services/AVS/privateClouds/templates/arm/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.json new file mode 100644 index 000000000..075c7ee72 --- /dev/null +++ b/services/AVS/privateClouds/templates/arm/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Memory Usage per Cluster (Critical)", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 0, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "95", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT30M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "UsageAverage", + "dimensions": [{"operator": "include", "name": "clustername", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/AVS/privateClouds/templates/arm/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.json b/services/AVS/privateClouds/templates/arm/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.json new file mode 100644 index 000000000..9fdf26bbc --- /dev/null +++ b/services/AVS/privateClouds/templates/arm/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Memory Usage per Cluster", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "80", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT30M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "UsageAverage", + "dimensions": [{"operator": "include", "name": "clustername", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/AVS/privateClouds/templates/arm/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.json b/services/AVS/privateClouds/templates/arm/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.json new file mode 100644 index 000000000..a6bc42e24 --- /dev/null +++ b/services/AVS/privateClouds/templates/arm/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Storage Usage per Datastore", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "70", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT30M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "DiskUsedPercentage", + "dimensions": [{"operator": "include", "name": "dsname", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/AVS/privateClouds/templates/bicep/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.bicep b/services/AVS/privateClouds/templates/bicep/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.bicep new file mode 100644 index 000000000..0fccc44b2 --- /dev/null +++ b/services/AVS/privateClouds/templates/bicep/CPUCritical_bb83ae53-3b91-49ff-a351-9545ea747335.bicep @@ -0,0 +1,140 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'CPU Usage per Cluster (Critical)' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 0 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 95 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT30M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'EffectiveCpuAverage' + dimensions: [ + { + name: 'clustername' + operator: 'include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/AVS/privateClouds/templates/bicep/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.bicep b/services/AVS/privateClouds/templates/bicep/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.bicep new file mode 100644 index 000000000..353e29373 --- /dev/null +++ b/services/AVS/privateClouds/templates/bicep/CPU_282cb835-592b-4e49-89c1-ea1aaefb37c4.bicep @@ -0,0 +1,140 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'CPU Usage per Cluster' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 80 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT30M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'EffectiveCpuAverage' + dimensions: [ + { + name: 'clustername' + operator: 'include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/AVS/privateClouds/templates/bicep/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.bicep b/services/AVS/privateClouds/templates/bicep/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.bicep new file mode 100644 index 000000000..4c970b8b6 --- /dev/null +++ b/services/AVS/privateClouds/templates/bicep/MemoryCritical_a2f5e5a3-ec38-4201-970f-971ad4707b93.bicep @@ -0,0 +1,140 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Memory Usage per Cluster (Critical)' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 0 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 95 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT30M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'UsageAverage' + dimensions: [ + { + name: 'clustername' + operator: 'include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/AVS/privateClouds/templates/bicep/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.bicep b/services/AVS/privateClouds/templates/bicep/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.bicep new file mode 100644 index 000000000..55c85d4bf --- /dev/null +++ b/services/AVS/privateClouds/templates/bicep/Memory_e840c7a9-e070-4dc9-919b-c75a3679f6b7.bicep @@ -0,0 +1,140 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Memory Usage per Cluster' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 80 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT30M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'UsageAverage' + dimensions: [ + { + name: 'clustername' + operator: 'include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/AVS/privateClouds/templates/bicep/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.bicep b/services/AVS/privateClouds/templates/bicep/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.bicep new file mode 100644 index 000000000..d8a5e9640 --- /dev/null +++ b/services/AVS/privateClouds/templates/bicep/Storage_8bafb3d5-d961-49a4-9867-f1cddfd703e0.bicep @@ -0,0 +1,140 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Storage Usage per Datastore' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 70 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT30M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'DiskUsedPercentage' + dimensions: [ + { + name: 'dsname' + operator: 'include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/MachineLearningServices/workspaces/templates/arm/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.json b/services/MachineLearningServices/workspaces/templates/arm/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.json new file mode 100644 index 000000000..c0e7f0473 --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/arm/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Number of model deployments that failed in this workspace.", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 3, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "0", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Total", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "Model Deploy Failed", + "dimensions": [], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/MachineLearningServices/workspaces/templates/arm/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.json b/services/MachineLearningServices/workspaces/templates/arm/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.json new file mode 100644 index 000000000..d2b2f5e8d --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/arm/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Percent of quota utilized.", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 3, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "90", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "Quota Utilization Percentage", + "dimensions": [], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/MachineLearningServices/workspaces/templates/arm/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.json b/services/MachineLearningServices/workspaces/templates/arm/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.json new file mode 100644 index 000000000..bda61d6bb --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/arm/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Number of unusable nodes. Unusable nodes are not functional due to some unresolvable issue. Azure will recycle these nodes.", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 3, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "0", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Total", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "Unusable Nodes", + "dimensions": [], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/MachineLearningServices/workspaces/templates/bicep/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.bicep b/services/MachineLearningServices/workspaces/templates/bicep/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.bicep new file mode 100644 index 000000000..04325f934 --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/bicep/ModelDeployFailed_0337a76f-238e-4d4d-9cd1-48b205874dbb.bicep @@ -0,0 +1,135 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Number of model deployments that failed in this workspace.' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 3 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 0 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Total' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT5M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT1M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'Model Deploy Failed' + dimensions: [[]] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/MachineLearningServices/workspaces/templates/bicep/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.bicep b/services/MachineLearningServices/workspaces/templates/bicep/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.bicep new file mode 100644 index 000000000..8fff070e3 --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/bicep/QuotaUtilizationPercentage_be3f1bfc-c21a-4399-9b9f-a33ebdc470cb.bicep @@ -0,0 +1,135 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Percent of quota utilized.' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 3 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 90 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT5M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT1M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'Quota Utilization Percentage' + dimensions: [[]] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/MachineLearningServices/workspaces/templates/bicep/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.bicep b/services/MachineLearningServices/workspaces/templates/bicep/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.bicep new file mode 100644 index 000000000..93dd546a6 --- /dev/null +++ b/services/MachineLearningServices/workspaces/templates/bicep/UnusableNodes_a171bc0c-676f-464b-a7b5-e50cd6c612a2.bicep @@ -0,0 +1,135 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Number of unusable nodes. Unusable nodes are not functional due to some unresolvable issue. Azure will recycle these nodes.' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 3 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 0 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Total' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT5M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT1M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'Unusable Nodes' + dimensions: [[]] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/Search/searchServices/templates/arm/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.json b/services/Search/searchServices/templates/arm/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.json new file mode 100644 index 000000000..1a057c27b --- /dev/null +++ b/services/Search/searchServices/templates/arm/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.json @@ -0,0 +1,95 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "metadata": { + "description": "Unique name (within the Resource Group) for the Activity log alert." + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Activity Log Alert for Search Service Delete", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Indicates whether or not the alert is enabled." + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/activityLogAlerts", + "apiVersion": "2017-04-01", + "name": "[parameters('alertName')]", + "location": "Global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": [ + "[subscription().id]" + ], + "enabled": "[parameters('isEnabled')]", + "condition": { + "allOf": [ + { + "field": "category", + "equals": "Administrative" + }, + { + "field": "operationName", + "equals": "Microsoft.Search/searchServices/delete" + }, + { + "field": "status", + "containsAny": ["succeeded"] + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/Search/searchServices/templates/bicep/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.bicep b/services/Search/searchServices/templates/bicep/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.bicep new file mode 100644 index 000000000..e9da93e1e --- /dev/null +++ b/services/Search/searchServices/templates/bicep/ActivityLogSearchServiceDelete_5611eb31-51c7-4279-ab8c-97dba0a2c044.bicep @@ -0,0 +1,68 @@ +@description('Unique name (within the Resource Group) for the Activity log alert.') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Activity Log Alert for Search Service Delete' + +@description('Indicates whether or not the alert is enabled.') +param isEnabled bool = true + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource symbolicname 'Microsoft.Insights/activityLogAlerts@2023-01-01-preview' = { + name: alertName + location: 'Global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: [ + subscription().id + ] + enabled: isEnabled + condition: { + allOf: [ + { + { + field: 'category' + equals: 'Administrative' + } + { + field: 'operationName' + equals: 'Microsoft.Search/searchServices/delete' + } + { + field: 'status' + containsAny: ['succeeded'] + } + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.json b/services/StorageCache/AmlFilesystems/templates/arm/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.json new file mode 100644 index 000000000..37f0174e6 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if MDTBytesUsed is above 85%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.85; AzureMetrics | where MetricName == \"MDTBytesTotal\" or MetricName == \"MDTBytesUsed\" | summarize MDTBytesTotal = maxif(Total, MetricName == \"MDTBytesTotal\"), MDTBytesUsed = maxif(Total, MetricName == \"MDTBytesUsed\") | extend UsedRatio = MDTBytesUsed / MDTBytesTotal | where UsedRatio > threshold_used | project UsedRatio, MDTBytesUsed, MDTBytesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "85", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "UsedRatio", "operator": "Include", "values": ["*"]}, {"name": "MDTBytesUsed", "operator": "Include", "values": ["*"]}, {"name": "MDTBytesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.json b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.json new file mode 100644 index 000000000..d59cb95d5 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if MDTBytesAvailable is below 15%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.15; AzureMetrics | where MetricName == \"MDTBytesAvailable\" or MetricName == \"MDTBytesTotal\" | summarize MDTBytesAvailable = maxif(Total, MetricName == \"MDTBytesAvailable\"), MDTBytesTotal = maxif(Total, MetricName == \"MDTBytesTotal\") | extend AvailableRatio = MDTBytesAvailable / MDTBytesTotal | where AvailableRatio < threshold_available | project AvailableRatio, MDTBytesAvailable, MDTBytesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "15", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "AvailableRatio", "operator": "Include", "values": ["*"]}, {"name": "MDTBytesAvailable", "operator": "Include", "values": ["*"]}, {"name": "MDTBytesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.json b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.json new file mode 100644 index 000000000..c36818c6a --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if MDTFilesFree is below 15%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.15; AzureMetrics | where MetricName == \"MDTFilesFree\" or MetricName == \"MDTFilesTotal\" | summarize MDTFilesFree = maxif(Total, MetricName == \"MDTFilesFree\"), MDTFilesTotal = maxif(Total, MetricName == \"MDTFilesTotal\") | extend FreeRatio = MDTFilesFree / MDTFilesTotal | where FreeRatio < threshold_free | project FreeRatio, MDTFilesFree, MDTFilesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "15", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "FreeRatio", "operator": "Include", "values": ["*"]}, {"name": "MDTFilesFree", "operator": "Include", "values": ["*"]}, {"name": "MDTFilesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.json b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.json new file mode 100644 index 000000000..83bbe912d --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if MDTFilesUsed is above 85%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.85; AzureMetrics | where MetricName == \"MDTFilesTotal\" or MetricName == \"MDTFilesUsed\" | summarize MDTFilesTotal = maxif(Total, MetricName == \"MDTFilesTotal\"), MDTFilesUsed = maxif(Total, MetricName == \"MDTFilesUsed\") | extend FreeRatio = MDTFilesFree / MDTFilesTotal | where UsedRatio > threshold_used | project UsedRatio, MDTFilesUsed, MDTFilesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "85", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "UsedRatio", "operator": "Include", "values": ["*"]}, {"name": "MDTFilesUsed", "operator": "Include", "values": ["*"]}, {"name": "MDTFilesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.json b/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.json new file mode 100644 index 000000000..3e3a9d50f --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if OSTBytesAvailable is below 15%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_free = 0.15; AzureMetrics | where MetricName == \"OSTBytesAvailable\" or MetricName == \"OSTBytesTotal\" | summarize OSTBytesAvailable = maxif(Total, MetricName == \"OSTBytesAvailable\"), OSTBytesTotal = maxif(Total, MetricName == \"OSTBytesTotal\") | extend AvailableRatio = OSTBytesAvailable / OSTBytesTotal | where AvailableRatio < threshold_available | project AvailableRatio, OSTBytesAvailable, OSTBytesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "15", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "AvailableRatio", "operator": "Include", "values": ["*"]}, {"name": "OSTBytesAvailable", "operator": "Include", "values": ["*"]}, {"name": "OSTBytesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.json b/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.json new file mode 100644 index 000000000..42cff46fb --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if OSTByteUsed is above 85%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.85; AzureMetrics | where MetricName == \"OSTBytesTotal\" or MetricName == \"OSTBytesUsed\" | summarize OSTBytesTotal = maxif(Total, MetricName == \"OSTBytesTotal\"), OSTBytesUsed = maxif(Total, MetricName == \"OSTBytesUsed\") | extend UsedRatio = OSTBytesUsed / OSTBytesTotal | where UsedRatio > threshold_used | project UsedRatio, OSTBytesUsed, OSTBytesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "85", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "UsedRatio", "operator": "Include", "values": ["*"]}, {"name": "OSTBytesUsed", "operator": "Include", "values": ["*"]}, {"name": "OSTBytesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.json b/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.json new file mode 100644 index 000000000..b5e4bca1a --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if OSTFilesFree is below 15%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_free = 0.15; AzureMetrics | where MetricName == \"OSTFilesFree\" or MetricName == \"OSTFilesTotal\" | summarize OSTFilesFree = maxif(Total, MetricName == \"OSTFilesFree\"), OSTFilesTotal = maxif(Total, MetricName == \"OSTFilesTotal\") | extend FreeRatio = OSTFilesFree / OSTFilesTotal | where FreeRatio < threshold_free | project FreeRatio, OSTFilesFree, OSTFilesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "15", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "FreeRatio", "operator": "Include", "values": ["*"]}, {"name": "OSTFilesFree", "operator": "Include", "values": ["*"]}, {"name": "OSTFilesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.json b/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.json new file mode 100644 index 000000000..3c699685d --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Log an alert if OSTFilesUsed is above 85%", + "metadata": { + "description": "Description of alert" + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "checkWorkspaceAlertsStorageConfigured": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Specifies whether to check linked storage and fail creation if the storage was not found" + } + }, + "resourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz" + } + }, + "muteActionsDuration": { + "type": "string", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H" + ], + "metadata": { + "description": "Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired." + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 2, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "autoMitigate": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert will automatically resolve" + } + }, + "query": { + "type": "string", + "minLength": 1, + "defaultValue": " let threshold_used = 0.85; AzureMetrics | where MetricName == \"OSTFilesTotal\" or MetricName == \"OSTFilesUsed\" | summarize OSTFilesTotal = maxif(Total, MetricName == \"OSTFilesTotal\"), OSTFilesUsed = maxif(Total, MetricName == \"OSTFilesUsed\") | extend UsedRatio = OSTFilesUsed / OSTFilesTotal | where UsedRatio > threshold_used | project UsedRatio, OSTFilesUsed, OSTFilesTotal ", + "metadata": { + "description": "Name of the metric used in the comparison to activate the alert." + } + }, + "metricMeasureColumn": { + "type": "string", + "defaultValue": "AggregatedValue", + "metadata": { + "description": "Name of the measure column used in the alert evaluation." + } + }, + "resourceIdColumn": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Name of the resource ID column used in the alert targeting the alerts." + } + }, + "operator": { + "type": "string", + "defaultValue": "GreaterThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "85", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "numberOfEvaluationPeriods": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of periods to check in the alert evaluation." + } + }, + "minFailingPeriodsToAlert": { + "type": "int", + "defaultValue": 1, + "metadata": { + "description": "The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods)." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Average", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]" + }, + "resources": [ + { + "type": "Microsoft.Insights/scheduledQueryRules", + "apiVersion": "2021-08-01", + "name": "[parameters('alertName')]", + "location": "[resourceGroup().location]", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "scopes": [ + "[parameters('resourceId')]" + ], + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "allOf": [ + { + "query": "[parameters('query')]", + "metricMeasureColumn": "[parameters('metricMeasureColumn')]", + "resourceIdColumn": "[parameters('resourceIdColumn')]", + "dimensions": [{"name": "UsedRatio", "operator": "Include", "values": ["*"]}, {"name": "OSTFilesUsed", "operator": "Include", "values": ["*"]}, {"name": "OSTFilesTotal", "operator": "Include", "values": ["*"]}], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "failingPeriods": { + "numberOfEvaluationPeriods": "[parameters('numberOfEvaluationPeriods')]", + "minFailingPeriodsToAlert": "[parameters('minFailingPeriodsToAlert')]" + } + } + ] + }, + "muteActionsDuration": "[parameters('muteActionsDuration')]", + "autoMitigate": "[parameters('autoMitigate')]", + "checkWorkspaceAlertsStorageConfigured": "[parameters('checkWorkspaceAlertsStorageConfigured')]" + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/arm/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.json b/services/StorageCache/AmlFilesystems/templates/arm/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.json new file mode 100644 index 000000000..c174a96c2 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/arm/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.json @@ -0,0 +1,199 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "alertName": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Name of the alert" + } + }, + "alertDescription": { + "type": "string", + "defaultValue": "Total number of client input/output operations per second", + "metadata": { + "description": "Description of alert" + } + }, + "targetResourceId": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "List of Azure resource Ids seperated by a comma. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name" + } + }, + "targetResourceRegion": { + "type": "string", + "metadata": { + "description": "Azure region in which target resources to be monitored are in (without spaces). For example: EastUS" + } + }, + "targetResourceType": { + "type": "string", + "minLength": 1, + "metadata": { + "description": "Resource type of target resources to be monitored." + } + }, + "isEnabled": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Specifies whether the alert is enabled" + } + }, + "alertSeverity": { + "type": "int", + "defaultValue": 1, + "allowedValues": [ + 0, + 1, + 2, + 3, + 4 + ], + "metadata": { + "description": "Severity of alert {0,1,2,3,4}" + } + }, + "operator": { + "type": "string", + "defaultValue": "LessThan", + "allowedValues": [ + "Equals", + "GreaterThan", + "GreaterThanOrEqual", + "LessThan", + "LessThanOrEqual" + ], + "metadata": { + "description": "Operator comparing the current value with the threshold value." + } + }, + "threshold": { + "type": "string", + "defaultValue": "99", + "metadata": { + "description": "The threshold value at which the alert is activated." + } + }, + "timeAggregation": { + "type": "string", + "defaultValue": "Total", + "allowedValues": [ + "Average", + "Minimum", + "Maximum", + "Total", + "Count" + ], + "metadata": { + "description": "How the data that is collected should be combined over time." + } + }, + "windowSize": { + "type": "string", + "defaultValue": "PT5M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H", + "PT6H", + "PT12H", + "PT24H", + "PT1D" + ], + "metadata": { + "description": "Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format." + } + }, + "evaluationFrequency": { + "type": "string", + "defaultValue": "PT1M", + "allowedValues": [ + "PT1M", + "PT5M", + "PT15M", + "PT30M", + "PT1H" + ], + "metadata": { + "description": "how often the metric alert is evaluated represented in ISO 8601 duration format" + } + }, + "currentDateTimeUtcNow": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The current date and time using the utcNow function. Used for deployment name uniqueness" + } + }, + "telemetryOptOut": { + "type": "string", + "defaultValue": "No", + "allowedValues": [ + "Yes", + "No" + ], + "metadata": { + "description": "The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry." + } + } + }, + "variables": { + "pidDeploymentName": "[take(concat('pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-', uniqueString(resourceGroup().id, parameters('alertName'), parameters('currentDateTimeUtcNow'))), 64)]", + "varTargetResourceId": "[split(parameters('targetResourceId'), ',')]" + }, + "resources": [ + { + "type": "Microsoft.Insights/metricAlerts", + "apiVersion": "2018-03-01", + "name": "[parameters('alertName')]", + "location": "global", + "tags": { + "_deployed_by_amba": true + }, + "properties": { + "description": "[parameters('alertDescription')]", + "scopes": "[variables('varTargetResourceId')]", + "targetResourceType": "[parameters('targetResourceType')]", + "targetResourceRegion": "[parameters('targetResourceRegion')]", + "severity": "[parameters('alertSeverity')]", + "enabled": "[parameters('isEnabled')]", + "evaluationFrequency": "[parameters('evaluationFrequency')]", + "windowSize": "[parameters('windowSize')]", + "criteria": { + "odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria", + "allOf": [ + { + "name": "1st criterion", + "metricName": "Uptime", + "dimensions": [], + "operator": "[parameters('operator')]", + "threshold": "[parameters('threshold')]", + "timeAggregation": "[parameters('timeAggregation')]", + "criterionType": "StaticThresholdCriterion" + } + ] + } + } + }, + { + "condition": "[equals(parameters('telemetryOptOut'), 'No')]", + "apiVersion": "2020-06-01", + "name": "[variables('pidDeploymentName')]", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + } + ] +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.bicep new file mode 100644 index 000000000..3dde5a525 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/MDTBytesUsed_ebd68fdd-9672-43e8-b7d5-6e479210535d.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if MDTBytesUsed is above 85%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.85; AzureMetrics | where MetricName == "MDTBytesTotal" or MetricName == "MDTBytesUsed" | summarize MDTBytesTotal = maxif(Total, MetricName == "MDTBytesTotal"), MDTBytesUsed = maxif(Total, MetricName == "MDTBytesUsed") | extend UsedRatio = MDTBytesUsed / MDTBytesTotal | where UsedRatio > threshold_used | project UsedRatio, MDTBytesUsed, MDTBytesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 85 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'UsedRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTBytesUsed' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTBytesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.bicep new file mode 100644 index 000000000..41bb9b4f1 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesAvailable_ecec6f93-af7e-4071-b35d-cd70b3f16581.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if MDTBytesAvailable is below 15%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.15; AzureMetrics | where MetricName == "MDTBytesAvailable" or MetricName == "MDTBytesTotal" | summarize MDTBytesAvailable = maxif(Total, MetricName == "MDTBytesAvailable"), MDTBytesTotal = maxif(Total, MetricName == "MDTBytesTotal") | extend AvailableRatio = MDTBytesAvailable / MDTBytesTotal | where AvailableRatio < threshold_available | project AvailableRatio, MDTBytesAvailable, MDTBytesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 15 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'AvailableRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTBytesAvailable' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTBytesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.bicep new file mode 100644 index 000000000..4822dbdb6 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesFree_2feba8fd-ff1e-4f48-bc01-6e2996edafa6.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if MDTFilesFree is below 15%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.15; AzureMetrics | where MetricName == "MDTFilesFree" or MetricName == "MDTFilesTotal" | summarize MDTFilesFree = maxif(Total, MetricName == "MDTFilesFree"), MDTFilesTotal = maxif(Total, MetricName == "MDTFilesTotal") | extend FreeRatio = MDTFilesFree / MDTFilesTotal | where FreeRatio < threshold_free | project FreeRatio, MDTFilesFree, MDTFilesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 15 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'FreeRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTFilesFree' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTFilesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.bicep new file mode 100644 index 000000000..e42f3ac6c --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/MDTFilesUsed_48fc094d-8a00-4d3c-86d3-3230c7e5881a.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if MDTFilesUsed is above 85%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.85; AzureMetrics | where MetricName == "MDTFilesTotal" or MetricName == "MDTFilesUsed" | summarize MDTFilesTotal = maxif(Total, MetricName == "MDTFilesTotal"), MDTFilesUsed = maxif(Total, MetricName == "MDTFilesUsed") | extend FreeRatio = MDTFilesFree / MDTFilesTotal | where UsedRatio > threshold_used | project UsedRatio, MDTFilesUsed, MDTFilesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 85 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'UsedRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTFilesUsed' + operator: 'Include' + values: ['*'] + } + { + name: 'MDTFilesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.bicep new file mode 100644 index 000000000..f54da1a33 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesAvailable_4eeca790-a804-4453-b339-73ea425610bc.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if OSTBytesAvailable is below 15%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_free = 0.15; AzureMetrics | where MetricName == "OSTBytesAvailable" or MetricName == "OSTBytesTotal" | summarize OSTBytesAvailable = maxif(Total, MetricName == "OSTBytesAvailable"), OSTBytesTotal = maxif(Total, MetricName == "OSTBytesTotal") | extend AvailableRatio = OSTBytesAvailable / OSTBytesTotal | where AvailableRatio < threshold_available | project AvailableRatio, OSTBytesAvailable, OSTBytesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 15 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'AvailableRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTBytesAvailable' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTBytesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.bicep new file mode 100644 index 000000000..405f71dd9 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/OSTBytesUsed_59298086-ec77-4f47-b2ef-b853b79e31cb.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if OSTByteUsed is above 85%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.85; AzureMetrics | where MetricName == "OSTBytesTotal" or MetricName == "OSTBytesUsed" | summarize OSTBytesTotal = maxif(Total, MetricName == "OSTBytesTotal"), OSTBytesUsed = maxif(Total, MetricName == "OSTBytesUsed") | extend UsedRatio = OSTBytesUsed / OSTBytesTotal | where UsedRatio > threshold_used | project UsedRatio, OSTBytesUsed, OSTBytesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 85 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'UsedRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTBytesUsed' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTBytesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.bicep new file mode 100644 index 000000000..0bdac3e4a --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesFree_8f231351-c123-4e4c-8631-9978e641a3ca.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if OSTFilesFree is below 15%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_free = 0.15; AzureMetrics | where MetricName == "OSTFilesFree" or MetricName == "OSTFilesTotal" | summarize OSTFilesFree = maxif(Total, MetricName == "OSTFilesFree"), OSTFilesTotal = maxif(Total, MetricName == "OSTFilesTotal") | extend FreeRatio = OSTFilesFree / OSTFilesTotal | where FreeRatio < threshold_free | project FreeRatio, OSTFilesFree, OSTFilesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 15 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'FreeRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTFilesFree' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTFilesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.bicep new file mode 100644 index 000000000..b9dc8e593 --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/OSTFilesUsed_9d086772-1887-4893-8b9f-7e5169398bae.bicep @@ -0,0 +1,183 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Log an alert if OSTFilesUsed is above 85%' + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Specifies whether to check linked storage and fail creation if the storage was not found') +param checkWorkspaceAlertsStorageConfigured bool = false + +@description('Full Resource ID of the resource emitting the metric that will be used for the comparison. For example /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroups/ResourceGroupName/providers/Microsoft.compute/virtualMachines/VM_xyz') +@minLength(1) +param resourceId string + +@description('Mute actions for the chosen period of time (in ISO 8601 duration format) after the alert is fired.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' +]) +param muteActionsDuration string + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 2 + +@description('Specifies whether the alert will automatically resolve') +param autoMitigate bool = true + +@description('Name of the metric used in the comparison to activate the alert.') +@minLength(1) +param query string = ' let threshold_used = 0.85; AzureMetrics | where MetricName == "OSTFilesTotal" or MetricName == "OSTFilesUsed" | summarize OSTFilesTotal = maxif(Total, MetricName == "OSTFilesTotal"), OSTFilesUsed = maxif(Total, MetricName == "OSTFilesUsed") | extend UsedRatio = OSTFilesUsed / OSTFilesTotal | where UsedRatio > threshold_used | project UsedRatio, OSTFilesUsed, OSTFilesTotal ' + +@description('Name of the measure column used in the alert evaluation.') +param metricMeasureColumn string = 'AggregatedValue' + +@description('Name of the resource ID column used in the alert targeting the alerts.') +param resourceIdColumn string = '' + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'GreaterThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 85 + +@description('The number of periods to check in the alert evaluation.') +param numberOfEvaluationPeriods int = 1 + +@description('The number of unhealthy periods to alert on (must be lower or equal to numberOfEvaluationPeriods).') +param minFailingPeriodsToAlert int = 1 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Average' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT1M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT5M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource alert 'Microsoft.Insights/scheduledQueryRules@2021-08-01' = { + name: alertName + location: resourceGroup().location + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + severity: alertSeverity + enabled: isEnabled + scopes: [ + resourceId + ] + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + allOf: [ + { + query: query + metricMeasureColumn: metricMeasureColumn + resourceIdColumn: resourceIdColumn + dimensions: [ + { + name: 'UsedRatio' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTFilesUsed' + operator: 'Include' + values: ['*'] + } + { + name: 'OSTFilesTotal' + operator: 'Include' + values: ['*'] + }] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + failingPeriods: { + numberOfEvaluationPeriods: numberOfEvaluationPeriods + minFailingPeriodsToAlert: minFailingPeriodsToAlert + } + } + ] + } + muteActionsDuration: muteActionsDuration + autoMitigate: autoMitigate + checkWorkspaceAlertsStorageConfigured: checkWorkspaceAlertsStorageConfigured + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +} diff --git a/services/StorageCache/AmlFilesystems/templates/bicep/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.bicep b/services/StorageCache/AmlFilesystems/templates/bicep/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.bicep new file mode 100644 index 000000000..fd041082f --- /dev/null +++ b/services/StorageCache/AmlFilesystems/templates/bicep/Uptime_7f951991-c6ce-4c72-9f55-7eade2c4f57c.bicep @@ -0,0 +1,135 @@ +@description('Name of the alert') +@minLength(1) +param alertName string + +@description('Description of alert') +param alertDescription string = 'Total number of client input/output operations per second' + +@description('Array of Azure resource Ids. For example - /subscriptions/00000000-0000-0000-0000-0000-00000000/resourceGroup/resource-group-name/Microsoft.compute/virtualMachines/vm-name') +@minLength(1) +param targetResourceId array + +@description('Azure region in which target resources to be monitored are in (without spaces). For example: EastUS') +param targetResourceRegion string + +@description('Resource type of target resources to be monitored.') +@minLength(1) +param targetResourceType string + +@description('Specifies whether the alert is enabled') +param isEnabled bool = true + +@description('Severity of alert {0,1,2,3,4}') +@allowed([ + 0 + 1 + 2 + 3 + 4 +]) +param alertSeverity int = 1 + +@description('Operator comparing the current value with the threshold value.') +@allowed([ + 'Equals' + 'GreaterThan' + 'GreaterThanOrEqual' + 'LessThan' + 'LessThanOrEqual' +]) +param operator string = 'LessThan' + +@description('The threshold value at which the alert is activated.') +param threshold int = 99 + +@description('How the data that is collected should be combined over time.') +@allowed([ + 'Average' + 'Minimum' + 'Maximum' + 'Total' + 'Count' +]) +param timeAggregation string = 'Total' + +@description('Period of time used to monitor alert activity based on the threshold. Must be between one minute and one day. ISO 8601 duration format.') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' + 'PT6H' + 'PT12H' + 'PT24H' + 'P1D' +]) +param windowSize string = 'PT5M' + +@description('how often the metric alert is evaluated represented in ISO 8601 duration format') +@allowed([ + 'PT1M' + 'PT5M' + 'PT15M' + 'PT30M' + 'PT1H' +]) +param evaluationFrequency string = 'PT1M' + +@description('"The current date and time using the utcNow function. Used for deployment name uniqueness') +param currentDateTimeUtcNow string = utcNow() + +@description('The customer usage identifier used for telemetry purposes. The default value of False enables telemetry. The value of True disables telemetry.') +@allowed([ + 'Yes' + 'No' +]) +param telemetryOptOut string = 'No' + +resource metricAlert 'Microsoft.Insights/metricAlerts@2018-03-01' = { + name: alertName + location: 'global' + tags: { + _deployed_by_amba: 'true' + } + properties: { + description: alertDescription + scopes: targetResourceId + targetResourceType: targetResourceType + targetResourceRegion: targetResourceRegion + severity: alertSeverity + enabled: isEnabled + evaluationFrequency: evaluationFrequency + windowSize: windowSize + criteria: { + 'odata.type': 'Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria' + allOf: [ + { + name: '1st criterion' + metricName: 'Uptime' + dimensions: [[]] + operator: operator + threshold: threshold + timeAggregation: timeAggregation + criterionType: 'StaticThresholdCriterion' + } + ] + } + } +} + +var ambaTelemetryPidName = 'pid-8bb7cf8a-bcf7-4264-abcb-703ace2fc84d-${uniqueString(resourceGroup().id, alertName, currentDateTimeUtcNow)}' +resource ambaTelemetryPid 'Microsoft.Resources/deployments@2020-06-01' = if (telemetryOptOut == 'No') { + name: ambaTelemetryPidName + tags: { + _deployed_by_amba: 'true' + } + properties: { + mode: 'Incremental' + template: { + '$schema': 'https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#' + contentVersion: '1.0.0.0' + resources: [] + } + } +}