From 2052e522fbb18b5383e48b00699ca8d24cd1aca7 Mon Sep 17 00:00:00 2001
From: Georgi Ivanov <mollonado@gmail.com>
Date: Wed, 25 Sep 2024 16:12:13 +0100
Subject: [PATCH] Add variables for specifying node affinity and tolerations
 (#279)

* testing node affinities and tolerations

* names are singular due to: https://github.com/hashicorp/terraform-provider-kubernetes/issues/2066

* fix vars

* updated changelog and variables docs

---------

Co-authored-by: Georgi Ivanov <givanov@expediagroup.com>
---
 CHANGELOG.md     |   5 +++
 VARIABLES.md     |  25 +++++++-----
 k8s-readonly.tf  |  32 +++++++++++++++
 k8s-readwrite.tf |  32 +++++++++++++++
 variables.tf     | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 187 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 331e58d..0e60562 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,11 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [7.4.0] - 2024-09-25
+### Added
+- Added variables `hms_rw_tolerations` and `hms_ro_tolerations` to specify tolerations for the HMS ro and rw pods
+- Added variables `hms_rw_node_affinity` and `hms_ro_node_affinity` to specify node affinities for the HMS ro and rw pods
+
 ## [7.3.9] - 2024-09-11
 ### Fixed
 - Correct `MYSQL_DB_HOST` in readonly container.
diff --git a/VARIABLES.md b/VARIABLES.md
index 47be7aa..2c91a5c 100644
--- a/VARIABLES.md
+++ b/VARIABLES.md
@@ -55,7 +55,6 @@
 | external\_data\_buckets                                   | Buckets that are not managed by Apiary but added to Hive Metastore IAM role access.                                                                                                                                  | `list(any)`              | `[]`                                                                                                                                                                                                                                                                                           |    no    |
 | external\_database\_host                                  | External Metastore database host to support legacy installations, MySQL database won't be created by Apiary when this option is specified.                                                                           | `string`                 | `""`                                                                                                                                                                                                                                                                                           |    no    |
 | external\_database\_host\_readonly                                  | External Metastore database host to support legacy installations.                                                                           | `string`                 | `""`                                                                                                                                                                                                                                                                                           |    no    |
-
 | hive\_metastore\_port                                     | Port on which both Hive Metastore readwrite and readonly will run.                                                                                                                                                   | `number`                 | `9083`                                                                                                                                                                                                                                                                                         |    no    |
 | hms\_additional\_environment\_variables                   | Additional environment variables for the Hive Metastore.                                                                                                                                                             | `map(any)`               | `{}`                                                                                                                                                                                                                                                                                           |    no    |
 | hms\_housekeeper\_additional\_environment\_variables                   | Additional environment variables for Hive Housekeeper.                                                                                                                                                             | `map(any)`               | `{}`                                                                                                                                                                                                                                                                                           |    no    |
@@ -73,12 +72,16 @@
 | hms\_ro\_k8s\_max\_replica\_count                         | Max Number of read only Hive Metastore k8s pod replicas to create.                                                                                                                                                   | `number`                 | `"2048"`                                                                                                                                                                                                                                                                                       |    no    |
 | hms\_ro\_target\_cpu\_percentage                          | Read only Hive Metastore autoscaling threshold for CPU target usage.                                                                                                                                                 | `number`                 | `"2048"`                                                                                                                                                                                                                                                                                       |    no    |
 | hms\_ro\_request\_partition\_limit                        | Read only Hive Metastore limits of request partitions.                                                                                                                                                               | `string`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
-| hms\_rw\_request\_partition\_limit                        | Read Write Hive Metastore limits of request partitions.                                                                                                                                                              | `string`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
+| hms\_ro\_node\_affinity                        | Add node affinities to the Hive metastore pods.                                                                                                                                                              | `list(object)`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
+| hms\_ro\_tolerations                        | Add tolerations to the Hive metastore pods.                                                                                                                                                              | `list(object)`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
 | hms\_rw\_cpu                                              | CPU for the read/write Hive Metastore ECS task.<br>Valid values can be 256, 512, 1024, 2048 and 4096.<br>Reference: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html           | `string`                 | `"512"`                                                                                                                                                                                                                                                                                        |    no    |
 | hms\_rw\_db\_connection\_pool\_size                       | Read-write Hive metastore setting for size of the MySQL connection pool. Default is 10.                                                                                                                              | `number`                 | `10`                                                                                                                                                                                                                                                                                           |    no    |
 | hms\_rw\_ecs\_task\_count                                 | Desired ECS task count of the read/write Hive Metastore service.                                                                                                                                                     | `string`                 | `"3"`                                                                                                                                                                                                                                                                                          |    no    |
 | hms\_rw\_heapsize                                         | Heapsize for the read/write Hive Metastore.<br>Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html                                                                  | `string`                 | `"2048"`                                                                                                                                                                                                                                                                                       |    no    |
 | hms\_rw\_k8s\_replica\_count                              | Initial Number of read/write Hive Metastore k8s pod replicas to create.                                                                                                                                              | `number`                 | `"2048"`                                                                                                                                                                                                                                                                                       |    no    |
+| hms\_rw\_request\_partition\_limit                        | Read Write Hive Metastore limits of request partitions.                                                                                                                                                              | `string`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
+| hms\_rw\_node\_affinity                        | Add node affinities to the Hive metastore pods.                                                                                                                                                              | `list(object)`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
+| hms\_rw\_tolerations                        | Add tolerations to the Hive metastore pods.                                                                                                                                                              | `list(object)`                 | n/a                                                                                                                                                                                                                                                                                            |    no    |
 | iam\_name\_root                                           | Name to identify Hive Metastore IAM roles.                                                                                                                                                                           | `string`                 | `"hms"`                                                                                                                                                                                                                                                                                        |    no    |
 | ingress\_cidr                                             | Generally allowed ingress CIDR list.                                                                                                                                                                                 | `list(string)`           | n/a                                                                                                                                                                                                                                                                                            |   yes    |
 | instance\_name                                            | Apiary instance name to identify resources in multi-instance deployments.                                                                                                                                            | `string`                 | `""`                                                                                                                                                                                                                                                                                           |    no    |
@@ -118,16 +121,16 @@
 | vpc\_id                                                   | VPC ID.                                                                                                                                                                                                              | `string`                 | n/a                                                                                                                                                                                                                                                                                            |   yes    |
 | enable\_dashboard                                         | make EKS & ECS dashboard optional                                                                                                                                                                                    | `bool`                   | true                                                                                                                                                                                                                                                                                           |    no    |
 | rds\_family                                               | RDS Family                                                                                                                                                                                                           | `string`                 | aurora5.6                                                                                                                                                                                                                                                                                      |    no    |
-| datadog_metrics_enabled                                   | Enable Datadog metrics for HMS                                                                                                                                                                                       | `bool`                   | false                                                                                                                                                                                                                                                                                          |    no    |
-| datadog_metrics_hms_readwrite_readonly                    | Prometheus Metrics sent to datadog                                                                                                                                                                                   | list(string)             | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"]                                  |    no    |
+| datadog\_metrics\_enabled                                   | Enable Datadog metrics for HMS                                                                                                                                                                                       | `bool`                   | false                                                                                                                                                                                                                                                                                          |    no    |
+| datadog\_metrics\_hms\_readwrite\_readonly                    | Prometheus Metrics sent to datadog                                                                                                                                                                                   | list(string)             | ["metrics_classloading_loaded_value","metrics_threads_count_value","metrics_memory_heap_max_value","metrics_init_total_count_tables_value","metrics_init_total_count_dbs_value","metrics_memory_heap_used_value","metrics_init_total_count_partitions_value"]                                  |    no    |
 | datadog_metrics_port                                      | Port in which metrics will be send for Datadog                                                                                                                                                                       | string                   | 8080                                                                                                                                                                                                                                                                                           |    no    |
-| datadog_key_secret_name                                   | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments.                                                             | string                   | null                                                                                                                                                                                                                                                                                           |    no    |
-| datadog_agent_version                                     | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments.                                                                                                                 | string                   | 7.50.3-jmx                                                                                                                                                                                                                                                                                     |    no    |
-| datadog_agent_enabled                                     | Whether to include the datadog-agent container. This is only applicable to ECS deployments.                                                                                                                          | string                   | false                                                                                                                                                                                                                                                                                          |    no    |
-| enable_tcp_keepalive                                      | tcp_keepalive settings on HMS pods. To use this you need to enable the ability to cahnge sysctl settings on your kubernetes cluster. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). If your EKS version is below 1.24 you need to create a PodSecurityPolicy allowing the following sysctls "net.ipv4.tcp_keepalive_time", "net.ipv4.tcp_keepalive_intvl","net.ipv4.tcp_keepalive_probes" and a ClusterRole + Rolebinding for the service account running the HMS pods or all services accounts in the namespace where Apiary is running so that kubernetes can apply the tcp)keepalive configuration. For EKS 1.25 and above check this https://kubernetes.io/blog/2022/08/23/kubernetes-v1-25-release/#pod-security-changes. Also see tcp_keepalive_* variables.    | bool | false                                                                                                                                                                                                                                                                                          |    no    |
-| tcp_keepalive_time                                        | Sets net.ipv4.tcp_keepalive_time (seconds).                                                                                                                                                                           | number                   | `200`                                                                                                                                                                                                                                                                                          |    no    |
-| tcp_keepalive_intvl                                       | Sets net.ipv4.tcp_keepalive_intvl (seconds)                                                                                                                                                                           | number                   | `30`                                                                                                                                                                                                                                                                                           |    no    |
-| tcp_keepalive_probes                                      | Sets net.ipv4.tcp_keepalive_probes (seconds)                                                                                                                                                                          | number                   |  `2`                                                                                                                                                                                                                                                                                           |    no    |
+| datadog\_key\_secret\_name                                   | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments.                                                             | string                   | null                                                                                                                                                                                                                                                                                           |    no    |
+| datadog\_agent\_version                                     | Version of the Datadog Agent running in the ECS cluster. This is only applicable to ECS deployments.                                                                                                                 | string                   | 7.50.3-jmx                                                                                                                                                                                                                                                                                     |    no    |
+| datadog\_agent\_enabled                                     | Whether to include the datadog-agent container. This is only applicable to ECS deployments.                                                                                                                          | string                   | false                                                                                                                                                                                                                                                                                          |    no    |
+| enable\_tcp\_keepalive                                      | tcp_keepalive settings on HMS pods. To use this you need to enable the ability to cahnge sysctl settings on your kubernetes cluster. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). If your EKS version is below 1.24 you need to create a PodSecurityPolicy allowing the following sysctls "net.ipv4.tcp_keepalive_time", "net.ipv4.tcp_keepalive_intvl","net.ipv4.tcp_keepalive_probes" and a ClusterRole + Rolebinding for the service account running the HMS pods or all services accounts in the namespace where Apiary is running so that kubernetes can apply the tcp)keepalive configuration. For EKS 1.25 and above check this https://kubernetes.io/blog/2022/08/23/kubernetes-v1-25-release/#pod-security-changes. Also see tcp_keepalive_* variables.    | bool | false                                                                                                                                                                                                                                                                                          |    no    |
+| tcp\_keepalive\_time                                        | Sets net.ipv4.tcp_keepalive_time (seconds).                                                                                                                                                                           | number                   | `200`                                                                                                                                                                                                                                                                                          |    no    |
+| tcp\_keepalive\_intvl                                       | Sets net.ipv4.tcp_keepalive_intvl (seconds)                                                                                                                                                                           | number                   | `30`                                                                                                                                                                                                                                                                                           |    no    |
+| tcp\_keepalive\_probes                                      | Sets net.ipv4.tcp_keepalive_probes (seconds)                                                                                                                                                                          | number                   |  `2`                                                                                                                                                                                                                                                                                           |    no    |
 
 ### apiary_assume_roles
 
diff --git a/k8s-readonly.tf b/k8s-readonly.tf
index 90ad724..da0a238 100644
--- a/k8s-readonly.tf
+++ b/k8s-readonly.tf
@@ -42,6 +42,37 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" {
       spec {
         service_account_name            = kubernetes_service_account_v1.hms_readonly[0].metadata.0.name
         automount_service_account_token = true
+
+        dynamic "toleration" {
+          for_each = var.hms_ro_tolerations
+          content {
+            effect             = lookup(toleration.value, "effect", null)
+            key                = lookup(toleration.value, "key", null)
+            operator           = lookup(toleration.value, "operator", null)
+            value              = lookup(toleration.value, "value", null)
+          }
+        }
+
+        dynamic "affinity" {
+          for_each = var.hms_ro_node_affinity
+          content {
+            node_affinity {
+              required_during_scheduling_ignored_during_execution {
+                dynamic "node_selector_term" {
+                  for_each = lookup(affinity.value, "node_selector_term", [])
+                  content {
+                    match_expressions {
+                      key      = lookup(node_selector_term.value, "key", null)
+                      operator = lookup(node_selector_term.value, "operator", null)
+                      values   = lookup(node_selector_term.value, "values", [])
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+
         dynamic "security_context"  {
           for_each = var.enable_tcp_keepalive ? ["enabled"] : []
           content {
@@ -59,6 +90,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readonly" {
             }
           }
         }
+
         dynamic "init_container" {
           for_each = var.external_database_host == "" ? ["enabled"] : []
 
diff --git a/k8s-readwrite.tf b/k8s-readwrite.tf
index 3cb965c..aee28f6 100644
--- a/k8s-readwrite.tf
+++ b/k8s-readwrite.tf
@@ -42,6 +42,37 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" {
       spec {
         service_account_name            = kubernetes_service_account_v1.hms_readwrite[0].metadata.0.name
         automount_service_account_token = true
+
+        dynamic "toleration" {
+          for_each = var.hms_rw_tolerations
+          content {
+            effect             = lookup(toleration.value, "effect", null)
+            key                = lookup(toleration.value, "key", null)
+            operator           = lookup(toleration.value, "operator", null)
+            value              = lookup(toleration.value, "value", null)
+          }
+        }
+
+        dynamic "affinity" {
+          for_each = var.hms_rw_node_affinity
+          content {
+            node_affinity {
+              required_during_scheduling_ignored_during_execution {
+                dynamic "node_selector_term" {
+                  for_each = lookup(affinity.value, "node_selector_term", [])
+                  content {
+                    match_expressions {
+                      key      = lookup(node_selector_term.value, "key", null)
+                      operator = lookup(node_selector_term.value, "operator", null)
+                      values   = lookup(node_selector_term.value, "values", [])
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+
         dynamic "security_context"  {
           for_each = var.enable_tcp_keepalive ? ["enabled"] : []
           content {
@@ -59,6 +90,7 @@ resource "kubernetes_deployment_v1" "apiary_hms_readwrite" {
             }
           }
         }
+
         dynamic "init_container" {
           for_each = var.external_database_host == "" ? ["enabled"] : []
           content {
diff --git a/variables.tf b/variables.tf
index 4c7f89f..20c3fe9 100644
--- a/variables.tf
+++ b/variables.tf
@@ -394,6 +394,110 @@ variable "hms_ro_k8s_max_replica_count" {
   default     = 10
 }
 
+variable "hms_rw_node_affinity" {
+  description = <<EOF
+Adds a list of node affinities for the HMS readwrite pods. For example if you
+have a pool of workers with the following label "pool=metastore" you
+can add an affinity to these workers like this:
+
+hms_ro_node_affinity = [
+  {
+    node_selector_term = [
+      {
+        key      = "pool"
+        operator = "In"
+        values   = ["metastore"]
+      }
+    ]
+  }
+]
+EOF  
+  type = list(object({
+    node_selector_term = list(object({
+      key      = string
+      operator = string
+      values   = list(string)
+    }))
+  }))
+  default = []  # Default to an empty list
+}
+
+variable "hms_rw_tolerations" {
+  description = <<EOF
+Adds a list of tolerations for the HMS readwrite pods. For example if you
+have a pool of workers with the following taints "pool=metastore:NoSchedule" you
+can add a toleration like this:
+
+hms_rw_tolerations = [
+  {
+    key      = "pool"
+    operator = "Equal"
+    value    = "metastore"
+    effect   = "NoSchedule"
+  }
+]
+EOF
+  type = list(object({
+    effect             = string
+    key                = string
+    operator           = string
+    value              = string
+  }))
+  default = []
+}
+
+variable "hms_ro_node_affinity" {
+  description = <<EOF
+Adds a list of node affinities for the HMS readonly pods. For example if you
+have a pool of workers with the following label "pool=metastore" you
+can add an affinity to these workers like this:
+
+hms_ro_node_affinity = [
+  {
+    node_selector_term = [
+      {
+        key      = "pool"
+        operator = "In"
+        values   = ["metastore"]
+      }
+    ]
+  }
+]
+EOF
+  type = list(object({
+    node_selector_term = list(object({
+      key      = string
+      operator = string
+      values   = list(string)
+    }))
+  }))
+  default = []
+}
+
+variable "hms_ro_tolerations" {
+  description = <<EOF
+Adds a list of tolerations for the HMS readonly pods. For example if you
+have a pool of workers with the following taints "pool=metastore:NoSchedule" you
+can add a toleration like this:
+
+hms_rw_tolerations = [
+  {
+    key      = "pool"
+    operator = "Equal"
+    value    = "metastore"
+    effect   = "NoSchedule"
+  }
+]
+EOF  
+  type = list(object({
+    effect             = string
+    key                = string
+    operator           = string
+    value              = string
+  }))
+  default = []
+}
+
 variable "enable_autoscaling" {
   description = "Enable read only Hive Metastore k8s horizontal pod autoscaling"
   type        = bool