Merge pull request #11 from dasmeta/DMVP-2500

feat(DMVP-2500): Added a submodule for notifications.
dasmeta · Jul 18, 2023 · 73b59b4 · 73b59b4
2 parents 64e92ae + 9b58bf8
commit 73b59b4
Show file tree

Hide file tree

Showing 19 changed files with 341 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -88,7 +88,32 @@ module "grafana_contact_points" {
 }
 ```
 
-## Example for 2 submodules together
+## Example for Notifications
+```
+module "grafana_contact_points" {
+  source  = "dasmeta/grafana/onpremise//modules/notifications"
+
+  notifications = {
+    contact_point   = "Slack"
+    group_by        = ["alertname"]
+    group_interval  = "10m"
+    repeat_interval = "1h"
+
+    policy = {
+      contact_point = "Opsgenie"
+      continue      = false
+
+      matcher = {
+        label = "priority"
+        match = "="
+        value = "P1"
+      }
+    }
+  }
+}
+```
+
+## Example for all submodules together
 ```
 module "grafana_alerts" {
   source  = "dasmeta/grafana/onpremise"
@@ -119,6 +144,7 @@ module "grafana_alerts" {
       threshold = 1
     }
   ]
+
   opsgenie_endpoints = [
     {
       name       = "Dev OpsGenie"
@@ -130,17 +156,36 @@ module "grafana_alerts" {
       api_key = "werARdsswefazgads12dad"
     }
   ]
+
   slack_endpoints = [
     {
       name        = "Dev Notifications"
       webhook_url = "https://hooks.slack.com/services/T6safsfFSF2352SFzdn"
     }
   ]
+
+  notifications = {
+    contact_point   = "Slack"
+    group_by        = ["alertname"]
+    group_interval  = "10m"
+    repeat_interval = "1h"
+
+    policy = {
+      contact_point = "Opsgenie"
+      continue      = false
+
+      matcher = {
+        label = "priority"
+        match = "="
+        value = "P1"
+      }
+    }
+  }
 }
 ```
 
 ## Usage
-Check `modules/alerts/tests` and `modules/contact-points/test` folders to see more examples.
+Check `modules/alerts/tests`, `modules/contact-points/test` and `modules/notifications/test` folders to see more examples.
 <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
 ## Requirements
 
@@ -156,6 +201,7 @@ No providers.
 |------|--------|---------|
 | <a name="module_alerts"></a> [alerts](#module\_alerts) | ./modules/alerts | n/a |
 | <a name="module_contact_points"></a> [contact\_points](#module\_contact\_points) | ./modules/contact-points | n/a |
+| <a name="module_notifications"></a> [notifications](#module\_notifications) | ./modules/notifications | n/a |
 
 ## Resources
 
@@ -166,7 +212,8 @@ No resources.
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_alert_interval_seconds"></a> [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no |
-| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name            = string                          # The name of the alert rule<br>    summary         = optional(string, "")            # Rule annotation as a summary<br>    folder_name     = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource      = string                          # Name of the datasource used for the alert<br>    metric_name     = string                          # Prometheus metric name which queries the data for the alert<br>    metric_function = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval = optional(string, "")            # The time interval with using functions like rate<br>    filters         = optional(any, {})               # Filters object to identify each service for alerting<br>    function        = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation        = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e.<br>    threshold       = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
+| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name            = string                          # The name of the alert rule<br>    summary         = optional(string, "")            # Rule annotation as a summary<br>    priority        = optional(string, "P2")          # Rule priority level: P2 is for non-critical alerts, P1 will be set for critical alerts<br>    folder_name     = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource      = string                          # Name of the datasource used for the alert<br>    metric_name     = string                          # Prometheus metric name which queries the data for the alert<br>    metric_function = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval = optional(string, "")            # The time interval with using functions like rate<br>    filters         = optional(any, {})               # Filters object to identify each service for alerting<br>    function        = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation        = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e.<br>    threshold       = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
+| <a name="input_notifications"></a> [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. | <pre>object({<br>    contact_point   = optional(string, "Slack")                               # The default contact point to route all unmatched notifications to.<br>    group_by        = optional(list(string), ["grafana_folder", "alertname"]) # A list of alert labels to group alerts into notifications by.<br>    group_interval  = optional(string, "5m")                                  # Minimum time interval between two notifications for the same group.<br>    repeat_interval = optional(string, "4h")                                  # Minimum time interval for re-sending a notification if an alert is still firing.<br><br>    policy = optional(object({<br>      contact_point = optional(string, null) # The contact point to route notifications that match this rule to.<br>      continue      = optional(bool, false)  # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.<br>      group_by      = optional(list(string), [])<br>      mute_timings  = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.<br><br>      matcher = optional(object({<br>        label = optional(string, "priority") # The name of the label to match against.<br>        match = optional(string, "=")        # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.<br>        value = optional(string, "P1")       # The label value to match against.<br>      }))<br>    }))<br>  })</pre> | `{}` | no |
 | <a name="input_opsgenie_endpoints"></a> [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. | <pre>list(object({<br>    name                    = string                                                 # The name of the contact point.<br>    api_key                 = string                                                 # The OpsGenie API key to use.<br>    auto_close              = optional(bool, false)                                  # Whether to auto-close alerts in OpsGenie when they resolve in the Alertmanager.<br>    message                 = optional(string, "")                                   # The templated content of the message.<br>    api_url                 = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL.<br>    disable_resolve_message = optional(bool, false)                                  # Whether to disable sending resolve messages.<br>  }))</pre> | `[]` | no |
 | <a name="input_slack_endpoints"></a> [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. | <pre>list(object({<br>    name                    = string                                                     # The name of the contact point.<br>    endpoint_url            = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to.<br>    icon_emoji              = optional(string, "")                                       # The name of a Slack workspace emoji to use as the bot icon.<br>    icon_url                = optional(string, "")                                       # A URL of an image to use as the bot icon.<br>    recipient               = optional(string, null)                                     # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to.<br>    text                    = optional(string, "")                                       # Templated content of the message.<br>    title                   = optional(string, "")                                       # Templated title of the message.<br>    token                   = optional(string, "")                                       # A Slack API token,for sending messages directly without the webhook method.<br>    webhook_url             = optional(string, "")                                       # A Slack webhook URL,for sending messages via the webhook method.<br>    username                = optional(string, "")                                       # Username for the bot to use.<br>    disable_resolve_message = optional(bool, false)                                      # Whether to disable sending resolve messages.<br>  }))</pre> | `[]` | no |
 

diff --git a/main.tf b/main.tf
@@ -11,3 +11,9 @@ module "contact_points" {
   slack_endpoints    = var.slack_endpoints
   opsgenie_endpoints = var.opsgenie_endpoints
 }
+
+module "notifications" {
+  source = "./modules/notifications"
+
+  notifications = var.notifications
+}
diff --git a/modules/alerts/README.md b/modules/alerts/README.md
@@ -12,6 +12,9 @@ Alert conditions are formed based on $B blocks and `equation`, `threshold` param
 
 And `threshold` parameter is the number value against which B blocks are compared in the math expression.
 
+## Priority
+Specify alert rule priority by passing the priority parameter to the alert_rules variable. By default, the value will be P2. For example, you can set the value to P1 and configure it so that alerts with P1 priority will be sent to Opsgenie, while the other alerts will be sent to Slack.
+
 <!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
 ## Requirements
 
@@ -42,7 +45,7 @@ No modules.
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_alert_interval_seconds"></a> [alert\_interval\_seconds](#input\_alert\_interval\_seconds) | The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially. | `number` | `10` | no |
-| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name            = string                          # The name of the alert rule<br>    summary         = optional(string, "")            # Rule annotation as a summary<br>    folder_name     = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource      = string                          # Name of the datasource used for the alert<br>    metric_name     = string                          # Prometheus metric name which queries the data for the alert<br>    metric_function = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval = optional(string, "")            # The time interval with using functions like rate<br>    filters         = optional(any, {})               # Filters object to identify each service for alerting<br>    function        = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation        = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold       = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
+| <a name="input_alert_rules"></a> [alert\_rules](#input\_alert\_rules) | This varibale describes alert folders, groups and rules. | <pre>list(object({<br>    name            = string                          # The name of the alert rule<br>    summary         = optional(string, "")            # Rule annotation as a summary<br>    priority        = optional(string, "P2")          # Rule priority level: P2 is for non-critical alerts, P1 will be set for critical alerts<br>    folder_name     = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created<br>    datasource      = string                          # Name of the datasource used for the alert<br>    metric_name     = string                          # Prometheus metric name which queries the data for the alert<br>    metric_function = optional(string, "")            # Prometheus function used with metric for queries, like rate, sum etc.<br>    metric_interval = optional(string, "")            # The time interval with using functions like rate<br>    filters         = optional(any, {})               # Filters object to identify each service for alerting<br>    function        = optional(string, "mean")        # One of Reduce functions which will be used in B block for alerting<br>    equation        = string                          # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e<br>    threshold       = number                          # The value against which B blocks are compared in the math expression<br>  }))</pre> | `[]` | no |
 
 ## Outputs
 

diff --git a/modules/alerts/main.tf b/modules/alerts/main.tf
@@ -34,6 +34,9 @@ resource "grafana_rule_group" "alert_rule" {
         "Managed By" = "Terraform"
         "Summary"    = lookup(rule.value, "summary", rule.value.name)
       }
+      labels = {
+        "priority" = lookup(rule.value, "priority", "P2")
+      }
       is_paused = false
       data {
         ref_id     = "A"

diff --git a/modules/alerts/tests/node-autoscaling/1-example.tf b/modules/alerts/tests/node-autoscaling/1-example.tf
@@ -5,7 +5,8 @@ module "this" {
     {
       name            = "Maximum node utilization in cluster"
       summary         = "Cluster is using 8 available nodes"
-      folder_name     = "Node Autoscaling"
+      folder_name     = "Test"
+      priority        = "P1"
       datasource      = "prometheus"
       filters         = null
       metric_name     = "kube_node_info"
@@ -17,7 +18,7 @@ module "this" {
     {
       name        = "High node utilization in cluster"
       summary     = "Cluster is using 6 of the available 8 nodes"
-      folder_name = "Node Autoscaling"
+      folder_name = "Test"
       datasource  = "prometheus"
       filters     = null
       metric_name = "kube_node_info"
@@ -28,9 +29,10 @@ module "this" {
     {
       name            = "Insufficient nodes in cluster"
       summary         = "Cluster is using fewer nodes than the required count"
-      folder_name     = "Node Autoscaling"
+      folder_name     = "Test"
       datasource      = "prometheus"
       filters         = null
+      metric_name     = "kube_node_info"
       metric_function = "sum"
       function        = "mean"
       equation        = "lt"

diff --git a/modules/alerts/variables.tf b/modules/alerts/variables.tf
@@ -8,6 +8,7 @@ variable "alert_rules" {
   type = list(object({
     name            = string                          # The name of the alert rule
     summary         = optional(string, "")            # Rule annotation as a summary
+    priority        = optional(string, "P2")          # Rule priority level: P2 is for non-critical alerts, P1 will be set for critical alerts
     folder_name     = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created
     datasource      = string                          # Name of the datasource used for the alert
     metric_name     = string                          # Prometheus metric name which queries the data for the alert

diff --git a/modules/notifications/README.md b/modules/notifications/README.md
@@ -0,0 +1,40 @@
+## Usage
+This Terraform module helps you manage Grafana notification policies, making it easier to configure alert notifications for different contact points and conditions.
+
+Notification policies can be created for various contact points. Additionally, you can have nested policies.
+
+Please refer to the `tests` folder for real examples.
+<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 |
+| <a name="requirement_grafana"></a> [grafana](#requirement\_grafana) | >= 1.40.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_grafana"></a> [grafana](#provider\_grafana) | >= 1.40.0 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [grafana_notification_policy.policy](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/notification_policy) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_notifications"></a> [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. | <pre>object({<br>    contact_point   = optional(string, "Slack")                               # The default contact point to route all unmatched notifications to.<br>    group_by        = optional(list(string), ["grafana_folder", "alertname"]) # A list of alert labels to group alerts into notifications by.<br>    group_interval  = optional(string, "5m")                                  # Minimum time interval between two notifications for the same group.<br>    repeat_interval = optional(string, "4h")                                  # Minimum time interval for re-sending a notification if an alert is still firing.<br><br>    policy = optional(object({<br>      contact_point = optional(string, null) # The contact point to route notifications that match this rule to.<br>      continue      = optional(bool, false)  # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it.<br>      group_by      = optional(list(string), [])<br>      mute_timings  = optional(list(string), []) # A list of mute timing names to apply to alerts that match this policy.<br><br>      matcher = optional(object({<br>        label = optional(string, "priority") # The name of the label to match against.<br>        match = optional(string, "=")        # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality.<br>        value = optional(string, "P1")       # The label value to match against.<br>      }))<br>    }))<br>  })</pre> | `{}` | no |
+
+## Outputs
+
+No outputs.
+<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->