Skip to content

Commit

Permalink
fix(DMVP-3094): Adot log retention
Browse files Browse the repository at this point in the history
  • Loading branch information
aramkarapetian committed Dec 12, 2023
1 parent 2a8ff61 commit 7b67965
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 20 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,14 @@ worker_groups = {
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_account_id"></a> [account\_id](#input\_account\_id) | AWS Account Id to apply changes into | `string` | `null` | no |
| <a name="input_adot_config"></a> [adot\_config](#input\_adot\_config) | n/a | `any` | <pre>{<br> "accept_namespace_regex": "(default|kube-system)",<br> "additional_metrics": {},<br> "log_group_name": "adot_log_group"<br>}</pre> | no |
| <a name="input_adot_config"></a> [adot\_config](#input\_adot\_config) | Adot configs | <pre>object({<br> accept_namespace_regex = optional(string, "(default|kube-system)")<br> additional_metrics = optional(list(string), [])<br> log_group_name = optional(string, "adot")<br> log_retention = optional(number, 14)<br> helm_values = optional(any, null)<br> })</pre> | <pre>{<br> "accept_namespace_regex": "(default|kube-system)",<br> "additional_metrics": [],<br> "log_group_name": "adot",<br> "log_retention": 14<br>}</pre> | no |
| <a name="input_adot_version"></a> [adot\_version](#input\_adot\_version) | The version of the AWS Distro for OpenTelemetry addon to use. | `string` | `"v0.78.0-eksbuild.1"` | no |
| <a name="input_alarms"></a> [alarms](#input\_alarms) | Alarms enabled by default you need set sns topic name for send alarms for customize alarms threshold use custom\_values | <pre>object({<br> enabled = optional(bool, true)<br> sns_topic = string<br> custom_values = optional(any, {})<br> })</pre> | n/a | yes |
| <a name="input_alb_log_bucket_name"></a> [alb\_log\_bucket\_name](#input\_alb\_log\_bucket\_name) | n/a | `string` | `""` | no |
| <a name="input_alb_log_bucket_path"></a> [alb\_log\_bucket\_path](#input\_alb\_log\_bucket\_path) | ALB-INGRESS-CONTROLLER | `string` | `""` | no |
| <a name="input_api_gateway_resources"></a> [api\_gateway\_resources](#input\_api\_gateway\_resources) | Nested map containing API, Stage, and VPC Link resources | <pre>list(object({<br> namespace = string<br> api = object({<br> name = string<br> protocolType = string<br> })<br> stages = optional(list(object({<br> name = string<br> namespace = string<br> apiRef_name = string<br> stageName = string<br> autoDeploy = bool<br> description = string<br> })))<br> vpc_links = optional(list(object({<br> name = string<br> namespace = string<br> })))<br> }))</pre> | `[]` | no |
| <a name="input_api_gw_deploy_region"></a> [api\_gw\_deploy\_region](#input\_api\_gw\_deploy\_region) | Region in which API gatewat will be configured | `string` | `""` | no |
| <a name="input_autoscaler_image_patch"></a> [autoscaler\_image\_patch](#input\_autoscaler\_image\_patch) | The patch number of autoscaler image | `number` | `1` | no |
| <a name="input_autoscaler_image_patch"></a> [autoscaler\_image\_patch](#input\_autoscaler\_image\_patch) | The patch number of autoscaler image | `number` | `0` | no |
| <a name="input_autoscaler_limits"></a> [autoscaler\_limits](#input\_autoscaler\_limits) | n/a | <pre>object({<br> cpu = string<br> memory = string<br> })</pre> | <pre>{<br> "cpu": "100m",<br> "memory": "600Mi"<br>}</pre> | no |
| <a name="input_autoscaler_requests"></a> [autoscaler\_requests](#input\_autoscaler\_requests) | n/a | <pre>object({<br> cpu = string<br> memory = string<br> })</pre> | <pre>{<br> "cpu": "100m",<br> "memory": "600Mi"<br>}</pre> | no |
| <a name="input_autoscaling"></a> [autoscaling](#input\_autoscaling) | Weather enable autoscaling or not in EKS | `bool` | `true` | no |
Expand All @@ -269,7 +269,7 @@ worker_groups = {
| <a name="input_fluent_bit_configs"></a> [fluent\_bit\_configs](#input\_fluent\_bit\_configs) | Fluent Bit configs | <pre>object({<br> fluent_bit_name = optional(string, "")<br> log_group_name = optional(string, "")<br> system_log_group_name = optional(string, "")<br> log_retention_days = optional(number, 90)<br> values_yaml = optional(string, "")<br> configs = optional(object({<br> inputs = optional(string, "")<br> filters = optional(string, "")<br> outputs = optional(string, "")<br> }), {})<br> drop_namespaces = optional(list(string), [])<br> log_filters = optional(list(string), [])<br> additional_log_filters = optional(list(string), [])<br> kube_namespaces = optional(list(string), [])<br> })</pre> | <pre>{<br> "additional_log_filters": [<br> "ELB-HealthChecker",<br> "Amazon-Route53-Health-Check-Service"<br> ],<br> "configs": {<br> "filters": "",<br> "inputs": "",<br> "outputs": ""<br> },<br> "drop_namespaces": [<br> "kube-system",<br> "opentelemetry-operator-system",<br> "adot",<br> "cert-manager",<br> "opentelemetry.*",<br> "meta.*"<br> ],<br> "fluent_bit_name": "",<br> "kube_namespaces": [<br> "kube.*",<br> "meta.*",<br> "adot.*",<br> "devops.*",<br> "cert-manager.*",<br> "git.*",<br> "opentelemetry.*",<br> "stakater.*",<br> "renovate.*"<br> ],<br> "log_filters": [<br> "kube-probe",<br> "health",<br> "prometheus",<br> "liveness"<br> ],<br> "log_group_name": "",<br> "log_retention_days": 90,<br> "system_log_group_name": "",<br> "values_yaml": ""<br>}</pre> | no |
| <a name="input_manage_aws_auth"></a> [manage\_aws\_auth](#input\_manage\_aws\_auth) | n/a | `bool` | `true` | no |
| <a name="input_map_roles"></a> [map\_roles](#input\_map\_roles) | Additional IAM roles to add to the aws-auth configmap. | <pre>list(object({<br> rolearn = string<br> username = string<br> groups = list(string)<br> }))</pre> | `[]` | no |
| <a name="input_metrics_exporter"></a> [metrics\_exporter](#input\_metrics\_exporter) | Metrics Exporter, can use cloudwatch or adot | `string` | `"cloudwatch"` | no |
| <a name="input_metrics_exporter"></a> [metrics\_exporter](#input\_metrics\_exporter) | Metrics Exporter, can use cloudwatch or adot | `string` | `"adot"` | no |
| <a name="input_metrics_server_name"></a> [metrics\_server\_name](#input\_metrics\_server\_name) | n/a | `string` | `"metrics-server"` | no |
| <a name="input_node_groups"></a> [node\_groups](#input\_node\_groups) | Map of EKS managed node group definitions to create | `any` | <pre>{<br> "default": {<br> "desired_size": 2,<br> "iam_role_additional_policies": [<br> "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"<br> ],<br> "instance_types": [<br> "t3.medium"<br> ],<br> "max_size": 4,<br> "min_size": 2<br> }<br>}</pre> | no |
| <a name="input_node_groups_default"></a> [node\_groups\_default](#input\_node\_groups\_default) | Map of EKS managed node group default configurations | `any` | <pre>{<br> "disk_size": 50,<br> "iam_role_additional_policies": [<br> "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"<br> ],<br> "instance_types": [<br> "t3.medium"<br> ]<br>}</pre> | no |
Expand Down
2 changes: 1 addition & 1 deletion examples/spot-instance/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

| Name | Version |
|------|---------|
| <a name="provider_aws"></a> [aws](#provider\_aws) | >= 3.41 |
| <a name="provider_aws"></a> [aws](#provider\_aws) | 4.67.0 |

## Modules

Expand Down
5 changes: 3 additions & 2 deletions modules/adot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ No modules.
| Name | Type |
|------|------|
| [aws_eks_addon.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon) | resource |
| [aws_iam_policy.adot](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
| [aws_iam_role.adot_collector](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
| [aws_iam_role_policy_attachment.CloudWatchAgentServerPolicy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
| [helm_release.adot-collector](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
Expand All @@ -73,8 +74,8 @@ No modules.

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_adot_collector_policy_arns"></a> [adot\_collector\_policy\_arns](#input\_adot\_collector\_policy\_arns) | List of IAM policy ARNs to attach to the ADOT collector service account. | `list(string)` | <pre>[<br> "arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess",<br> "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy",<br> "arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess"<br>]</pre> | no |
| <a name="input_adot_config"></a> [adot\_config](#input\_adot\_config) | accept\_namespace\_regex defines the list of namespaces from which metrics will be exported, and additional\_metrics defines additional metrics to export. | `any` | <pre>{<br> "accept_namespace_regex": "(default|kube-system)",<br> "additional_metrics": [],<br> "helm_values": null,<br> "log_group_name": "adot_log_group"<br>}</pre> | no |
| <a name="input_adot_collector_policy_arns"></a> [adot\_collector\_policy\_arns](#input\_adot\_collector\_policy\_arns) | List of IAM policy ARNs to attach to the ADOT collector service account. | `list(string)` | `[]` | no |
| <a name="input_adot_config"></a> [adot\_config](#input\_adot\_config) | accept\_namespace\_regex defines the list of namespaces from which metrics will be exported, and additional\_metrics defines additional metrics to export. | <pre>object({<br> accept_namespace_regex = optional(string, "(default|kube-system)")<br> additional_metrics = optional(list(string), [])<br> log_group_name = optional(string, "adot")<br> log_retention = optional(number, 14)<br> helm_values = optional(any, null)<br> })</pre> | <pre>{<br> "accept_namespace_regex": "(default|kube-system)",<br> "additional_metrics": [],<br> "helm_values": null,<br> "log_group_name": "adot",<br> "log_retention": 21<br>}</pre> | no |
| <a name="input_adot_log_group_name"></a> [adot\_log\_group\_name](#input\_adot\_log\_group\_name) | ADOT log group name | `string` | `"adot_log_group_name"` | no |
| <a name="input_adot_version"></a> [adot\_version](#input\_adot\_version) | The version of the AWS Distro for OpenTelemetry addon to use. | `string` | `"v0.78.0-eksbuild.1"` | no |
| <a name="input_cluster_name"></a> [cluster\_name](#input\_cluster\_name) | K8s cluster name. | `string` | n/a | yes |
Expand Down
10 changes: 10 additions & 0 deletions modules/adot/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,14 @@ locals {

merged_metrics = concat(local.default_metrics, lookup(var.adot_config, "additional_metrics", []))
merged_namespace_specific = concat(local.default_metrics_namespace_specific, lookup(var.adot_config, "namespace_specific_metrics", []))


adot_policies = concat([
"${aws_iam_policy.adot.arn}",
"arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess",
"arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy",
"arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess"
], var.adot_collector_policy_arns)


}
1 change: 1 addition & 0 deletions modules/adot/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ resource "helm_release" "adot-collector" {
cluster_name = var.cluster_name
accept_namespace_regex = var.adot_config.accept_namespace_regex
log_group_name = var.adot_config.log_group_name
log_retention = var.adot_config.log_retention
metrics = local.merged_metrics
metrics_namespace_specific = local.merged_namespace_specific
prometheus_metrics = var.prometheus_metrics
Expand Down
41 changes: 39 additions & 2 deletions modules/adot/role.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,45 @@ resource "aws_iam_role" "adot_collector" {
POLICY
}

resource "aws_iam_policy" "adot" {
name = "adot_policy"
path = "/"
description = "Adot Policy"

# Terraform's "jsonencode" function converts a
# Terraform expression result to valid JSON syntax.
policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Action" : [
"logs:PutLogEvents",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:DescribeLogStreams",
"logs:DescribeLogGroups",
"logs:PutRetentionPolicy",
"xray:PutTraceSegments",
"xray:PutTelemetryRecords",
"xray:GetSamplingRules",
"xray:GetSamplingTargets",
"xray:GetSamplingStatisticSummaries",
"ssm:GetParameters"
],
"Resource" : "*"
}
]
})
}

resource "aws_iam_role_policy_attachment" "CloudWatchAgentServerPolicy" {
for_each = toset(var.adot_collector_policy_arns)
policy_arn = each.key
count = length(local.adot_policies)

policy_arn = local.adot_policies[count.index]
role = aws_iam_role.adot_collector.name

depends_on = [
aws_iam_policy.adot
]
}
2 changes: 2 additions & 0 deletions modules/adot/templates/adot-values.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ adotCollector:
dimension_rollup_option: NoDimensionRollup
log_group_name: "${log_group_name}"
log_stream_name: "adot-metrics-prometheus"
log_retention: "${log_retention}"
metric_declarations:
- dimensions:
- - Namespace
Expand All @@ -150,6 +151,7 @@ adotCollector:
namespace: "ContainerInsights"
log_group_name: "${log_group_name}"
log_stream_name: "adot-metrics"
log_retention: "${log_retention}"
region: "${region}"
dimension_rollup_option: "NoDimensionRollup"
resource_to_telemetry_conversion:
Expand Down
17 changes: 10 additions & 7 deletions modules/adot/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,23 @@ variable "create_namespace" {
variable "adot_collector_policy_arns" {
description = "List of IAM policy ARNs to attach to the ADOT collector service account."
type = list(string)
default = [
"arn:aws:iam::aws:policy/AmazonPrometheusRemoteWriteAccess",
"arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy",
"arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess"
]
default = []
}

variable "adot_config" {
description = "accept_namespace_regex defines the list of namespaces from which metrics will be exported, and additional_metrics defines additional metrics to export."
type = any
type = object({
accept_namespace_regex = optional(string, "(default|kube-system)")
additional_metrics = optional(list(string), [])
log_group_name = optional(string, "adot")
log_retention = optional(number, 14)
helm_values = optional(any, null)
})
default = {
accept_namespace_regex = "(default|kube-system)"
additional_metrics = []
log_group_name = "adot_log_group"
log_group_name = "adot"
log_retention = 21
# ADOT helm chart values.yaml, if you don't use variable adot will be deployed with module default values file
helm_values = null
}
Expand Down
18 changes: 13 additions & 5 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -306,16 +306,24 @@ variable "vpc" {

variable "metrics_exporter" {
type = string
default = "cloudwatch"
default = "adot"
description = "Metrics Exporter, can use cloudwatch or adot"
}

variable "adot_config" {
type = any
type = object({
accept_namespace_regex = optional(string, "(default|kube-system)")
additional_metrics = optional(list(string), [])
log_group_name = optional(string, "adot")
log_retention = optional(number, 14)
helm_values = optional(any, null)
})
description = "Adot configs"
default = {
accept_namespace_regex = "(default|kube-system)"
additional_metrics = {}
log_group_name = "adot_log_group"
additional_metrics = []
log_group_name = "adot"
log_retention = 14
}
}

Expand Down Expand Up @@ -360,7 +368,7 @@ variable "autoscaling" {
variable "autoscaler_image_patch" {
type = number
description = "The patch number of autoscaler image"
default = 1
default = 0
}

variable "scale_down_unneeded_time" {
Expand Down

0 comments on commit 7b67965

Please sign in to comment.