From 7b67965fb9d175a4e067ae4a2f48a095dd600576 Mon Sep 17 00:00:00 2001 From: Aram Karapetyan Date: Tue, 12 Dec 2023 18:08:31 +0400 Subject: [PATCH] fix(DMVP-3094): Adot log retention --- README.md | 6 +-- examples/spot-instance/README.md | 2 +- modules/adot/README.md | 5 ++- modules/adot/locals.tf | 10 +++++ modules/adot/main.tf | 1 + modules/adot/role.tf | 41 ++++++++++++++++++++- modules/adot/templates/adot-values.yaml.tpl | 2 + modules/adot/variables.tf | 17 +++++---- variables.tf | 18 ++++++--- 9 files changed, 82 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index dd8ec80..69a6071 100644 --- a/README.md +++ b/README.md @@ -235,14 +235,14 @@ worker_groups = { | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [account\_id](#input\_account\_id) | AWS Account Id to apply changes into | `string` | `null` | no | -| [adot\_config](#input\_adot\_config) | n/a | `any` |
{
"accept_namespace_regex": "(default|kube-system)",
"additional_metrics": {},
"log_group_name": "adot_log_group"
}
| no | +| [adot\_config](#input\_adot\_config) | Adot configs |
object({
accept_namespace_regex = optional(string, "(default|kube-system)")
additional_metrics = optional(list(string), [])
log_group_name = optional(string, "adot")
log_retention = optional(number, 14)
helm_values = optional(any, null)
})
|
{
"accept_namespace_regex": "(default|kube-system)",
"additional_metrics": [],
"log_group_name": "adot",
"log_retention": 14
}
| no | | [adot\_version](#input\_adot\_version) | The version of the AWS Distro for OpenTelemetry addon to use. | `string` | `"v0.78.0-eksbuild.1"` | no | | [alarms](#input\_alarms) | Alarms enabled by default you need set sns topic name for send alarms for customize alarms threshold use custom\_values |
object({
enabled = optional(bool, true)
sns_topic = string
custom_values = optional(any, {})
})
| n/a | yes | | [alb\_log\_bucket\_name](#input\_alb\_log\_bucket\_name) | n/a | `string` | `""` | no | | [alb\_log\_bucket\_path](#input\_alb\_log\_bucket\_path) | ALB-INGRESS-CONTROLLER | `string` | `""` | no | | [api\_gateway\_resources](#input\_api\_gateway\_resources) | Nested map containing API, Stage, and VPC Link resources |
list(object({
namespace = string
api = object({
name = string
protocolType = string
})
stages = optional(list(object({
name = string
namespace = string
apiRef_name = string
stageName = string
autoDeploy = bool
description = string
})))
vpc_links = optional(list(object({
name = string
namespace = string
})))
}))
| `[]` | no | | [api\_gw\_deploy\_region](#input\_api\_gw\_deploy\_region) | Region in which API gatewat will be configured | `string` | `""` | no | -| [autoscaler\_image\_patch](#input\_autoscaler\_image\_patch) | The patch number of autoscaler image | `number` | `1` | no | +| [autoscaler\_image\_patch](#input\_autoscaler\_image\_patch) | The patch number of autoscaler image | `number` | `0` | no | | [autoscaler\_limits](#input\_autoscaler\_limits) | n/a |
object({
cpu = string
memory = string
})
|
{
"cpu": "100m",
"memory": "600Mi"
}
| no | | [autoscaler\_requests](#input\_autoscaler\_requests) | n/a |
object({
cpu = string
memory = string
})
|
{
"cpu": "100m",
"memory": "600Mi"
}
| no | | [autoscaling](#input\_autoscaling) | Weather enable autoscaling or not in EKS | `bool` | `true` | no | @@ -269,7 +269,7 @@ worker_groups = { | [fluent\_bit\_configs](#input\_fluent\_bit\_configs) | Fluent Bit configs |
object({
fluent_bit_name = optional(string, "")
log_group_name = optional(string, "")
system_log_group_name = optional(string, "")
log_retention_days = optional(number, 90)
values_yaml = optional(string, "")
configs = optional(object({
inputs = optional(string, "")
filters = optional(string, "")
outputs = optional(string, "")
}), {})
drop_namespaces = optional(list(string), [])
log_filters = optional(list(string), [])
additional_log_filters = optional(list(string), [])
kube_namespaces = optional(list(string), [])
})
|
{
"additional_log_filters": [
"ELB-HealthChecker",
"Amazon-Route53-Health-Check-Service"
],
"configs": {
"filters": "",
"inputs": "",
"outputs": ""
},
"drop_namespaces": [
"kube-system",
"opentelemetry-operator-system",
"adot",
"cert-manager",
"opentelemetry.*",
"meta.*"
],
"fluent_bit_name": "",
"kube_namespaces": [
"kube.*",
"meta.*",
"adot.*",
"devops.*",
"cert-manager.*",
"git.*",
"opentelemetry.*",
"stakater.*",
"renovate.*"
],
"log_filters": [
"kube-probe",
"health",
"prometheus",
"liveness"
],
"log_group_name": "",
"log_retention_days": 90,
"system_log_group_name": "",
"values_yaml": ""
}
| no | | [manage\_aws\_auth](#input\_manage\_aws\_auth) | n/a | `bool` | `true` | no | | [map\_roles](#input\_map\_roles) | Additional IAM roles to add to the aws-auth configmap. |
list(object({
rolearn = string
username = string
groups = list(string)
}))
| `[]` | no | -| [metrics\_exporter](#input\_metrics\_exporter) | Metrics Exporter, can use cloudwatch or adot | `string` | `"cloudwatch"` | no | +| [metrics\_exporter](#input\_metrics\_exporter) | Metrics Exporter, can use cloudwatch or adot | `string` | `"adot"` | no | | [metrics\_server\_name](#input\_metrics\_server\_name) | n/a | `string` | `"metrics-server"` | no | | [node\_groups](#input\_node\_groups) | Map of EKS managed node group definitions to create | `any` |
{
"default": {
"desired_size": 2,
"iam_role_additional_policies": [
"arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
],
"instance_types": [
"t3.medium"
],
"max_size": 4,
"min_size": 2
}
}
| no | | [node\_groups\_default](#input\_node\_groups\_default) | Map of EKS managed node group default configurations | `any` |
{
"disk_size": 50,
"iam_role_additional_policies": [
"arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
],
"instance_types": [
"t3.medium"
]
}
| no | diff --git a/examples/spot-instance/README.md b/examples/spot-instance/README.md index 41252e4..0bb326b 100644 --- a/examples/spot-instance/README.md +++ b/examples/spot-instance/README.md @@ -12,7 +12,7 @@ | Name | Version | |------|---------| -| [aws](#provider\_aws) | >= 3.41 | +| [aws](#provider\_aws) | 4.67.0 | ## Modules diff --git a/modules/adot/README.md b/modules/adot/README.md index adcca46..7fb61d3 100644 --- a/modules/adot/README.md +++ b/modules/adot/README.md @@ -58,6 +58,7 @@ No modules. | Name | Type | |------|------| | [aws_eks_addon.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon) | resource | +| [aws_iam_policy.adot](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | | [aws_iam_role.adot_collector](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | | [aws_iam_role_policy_attachment.CloudWatchAgentServerPolicy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [helm_release.adot-collector](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | @@ -73,8 +74,8 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [adot\_collector\_policy\_arns](#input\_adot\_collector\_policy\_arns) | List of IAM policy ARNs to attach to the ADOT collector service account. | `list(string)` |
[
"arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess",
"arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy",
"arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess"
]
| no | -| [adot\_config](#input\_adot\_config) | accept\_namespace\_regex defines the list of namespaces from which metrics will be exported, and additional\_metrics defines additional metrics to export. | `any` |
{
"accept_namespace_regex": "(default|kube-system)",
"additional_metrics": [],
"helm_values": null,
"log_group_name": "adot_log_group"
}
| no | +| [adot\_collector\_policy\_arns](#input\_adot\_collector\_policy\_arns) | List of IAM policy ARNs to attach to the ADOT collector service account. | `list(string)` | `[]` | no | +| [adot\_config](#input\_adot\_config) | accept\_namespace\_regex defines the list of namespaces from which metrics will be exported, and additional\_metrics defines additional metrics to export. |
object({
accept_namespace_regex = optional(string, "(default|kube-system)")
additional_metrics = optional(list(string), [])
log_group_name = optional(string, "adot")
log_retention = optional(number, 14)
helm_values = optional(any, null)
})
|
{
"accept_namespace_regex": "(default|kube-system)",
"additional_metrics": [],
"helm_values": null,
"log_group_name": "adot",
"log_retention": 21
}
| no | | [adot\_log\_group\_name](#input\_adot\_log\_group\_name) | ADOT log group name | `string` | `"adot_log_group_name"` | no | | [adot\_version](#input\_adot\_version) | The version of the AWS Distro for OpenTelemetry addon to use. | `string` | `"v0.78.0-eksbuild.1"` | no | | [cluster\_name](#input\_cluster\_name) | K8s cluster name. | `string` | n/a | yes | diff --git a/modules/adot/locals.tf b/modules/adot/locals.tf index b326f93..e095c54 100644 --- a/modules/adot/locals.tf +++ b/modules/adot/locals.tf @@ -27,4 +27,14 @@ locals { merged_metrics = concat(local.default_metrics, lookup(var.adot_config, "additional_metrics", [])) merged_namespace_specific = concat(local.default_metrics_namespace_specific, lookup(var.adot_config, "namespace_specific_metrics", [])) + + + adot_policies = concat([ + "${aws_iam_policy.adot.arn}", + "arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess", + "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy", + "arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess" + ], var.adot_collector_policy_arns) + + } diff --git a/modules/adot/main.tf b/modules/adot/main.tf index 759d536..a1a23f3 100644 --- a/modules/adot/main.tf +++ b/modules/adot/main.tf @@ -27,6 +27,7 @@ resource "helm_release" "adot-collector" { cluster_name = var.cluster_name accept_namespace_regex = var.adot_config.accept_namespace_regex log_group_name = var.adot_config.log_group_name + log_retention = var.adot_config.log_retention metrics = local.merged_metrics metrics_namespace_specific = local.merged_namespace_specific prometheus_metrics = var.prometheus_metrics diff --git a/modules/adot/role.tf b/modules/adot/role.tf index 9622f06..5c07940 100644 --- a/modules/adot/role.tf +++ b/modules/adot/role.tf @@ -30,8 +30,45 @@ resource "aws_iam_role" "adot_collector" { POLICY } +resource "aws_iam_policy" "adot" { + name = "adot_policy" + path = "/" + description = "Adot Policy" + + # Terraform's "jsonencode" function converts a + # Terraform expression result to valid JSON syntax. + policy = jsonencode({ + "Version" : "2012-10-17", + "Statement" : [ + { + "Effect" : "Allow", + "Action" : [ + "logs:PutLogEvents", + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:DescribeLogStreams", + "logs:DescribeLogGroups", + "logs:PutRetentionPolicy", + "xray:PutTraceSegments", + "xray:PutTelemetryRecords", + "xray:GetSamplingRules", + "xray:GetSamplingTargets", + "xray:GetSamplingStatisticSummaries", + "ssm:GetParameters" + ], + "Resource" : "*" + } + ] + }) +} + resource "aws_iam_role_policy_attachment" "CloudWatchAgentServerPolicy" { - for_each = toset(var.adot_collector_policy_arns) - policy_arn = each.key + count = length(local.adot_policies) + + policy_arn = local.adot_policies[count.index] role = aws_iam_role.adot_collector.name + + depends_on = [ + aws_iam_policy.adot + ] } diff --git a/modules/adot/templates/adot-values.yaml.tpl b/modules/adot/templates/adot-values.yaml.tpl index 29faa76..cf35bb0 100644 --- a/modules/adot/templates/adot-values.yaml.tpl +++ b/modules/adot/templates/adot-values.yaml.tpl @@ -128,6 +128,7 @@ adotCollector: dimension_rollup_option: NoDimensionRollup log_group_name: "${log_group_name}" log_stream_name: "adot-metrics-prometheus" + log_retention: "${log_retention}" metric_declarations: - dimensions: - - Namespace @@ -150,6 +151,7 @@ adotCollector: namespace: "ContainerInsights" log_group_name: "${log_group_name}" log_stream_name: "adot-metrics" + log_retention: "${log_retention}" region: "${region}" dimension_rollup_option: "NoDimensionRollup" resource_to_telemetry_conversion: diff --git a/modules/adot/variables.tf b/modules/adot/variables.tf index 6c65dd3..9a154bb 100644 --- a/modules/adot/variables.tf +++ b/modules/adot/variables.tf @@ -34,20 +34,23 @@ variable "create_namespace" { variable "adot_collector_policy_arns" { description = "List of IAM policy ARNs to attach to the ADOT collector service account." type = list(string) - default = [ - "arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess", - "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy", - "arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess" - ] + default = [] } variable "adot_config" { description = "accept_namespace_regex defines the list of namespaces from which metrics will be exported, and additional_metrics defines additional metrics to export." - type = any + type = object({ + accept_namespace_regex = optional(string, "(default|kube-system)") + additional_metrics = optional(list(string), []) + log_group_name = optional(string, "adot") + log_retention = optional(number, 14) + helm_values = optional(any, null) + }) default = { accept_namespace_regex = "(default|kube-system)" additional_metrics = [] - log_group_name = "adot_log_group" + log_group_name = "adot" + log_retention = 21 # ADOT helm chart values.yaml, if you don't use variable adot will be deployed with module default values file helm_values = null } diff --git a/variables.tf b/variables.tf index f17d78a..f192ca0 100644 --- a/variables.tf +++ b/variables.tf @@ -306,16 +306,24 @@ variable "vpc" { variable "metrics_exporter" { type = string - default = "cloudwatch" + default = "adot" description = "Metrics Exporter, can use cloudwatch or adot" } variable "adot_config" { - type = any + type = object({ + accept_namespace_regex = optional(string, "(default|kube-system)") + additional_metrics = optional(list(string), []) + log_group_name = optional(string, "adot") + log_retention = optional(number, 14) + helm_values = optional(any, null) + }) + description = "Adot configs" default = { accept_namespace_regex = "(default|kube-system)" - additional_metrics = {} - log_group_name = "adot_log_group" + additional_metrics = [] + log_group_name = "adot" + log_retention = 14 } } @@ -360,7 +368,7 @@ variable "autoscaling" { variable "autoscaler_image_patch" { type = number description = "The patch number of autoscaler image" - default = 1 + default = 0 } variable "scale_down_unneeded_time" {