From d763cf7d3dc756ef4a5fbffb6295537071100e96 Mon Sep 17 00:00:00 2001 From: Ales Verbic Date: Thu, 21 Nov 2024 11:52:55 -0500 Subject: [PATCH] fix(gke-cluster): update gke-cluster config Signed-off-by: Ales Verbic Signed-off-by: Chris Gianelloni --- common/defaults-gcp.yaml | 88 ++++++++++--- modules/common/cert-manager/stage2/main.tf | 8 +- modules/gcp/storage-classes/stage1/main.tf | 45 +++++++ stage0/gcp-terraform/.terraform.lock.hcl | 34 ++--- stage0/gcp-terraform/main.tf | 2 +- stage0/gcp-terraform/versions.tf | 2 +- stage2/.terraform.lock.hcl | 28 ++--- stage2/main.tf | 5 +- stage3/.terraform.lock.hcl | 28 ++--- stage3/kupo.tf | 44 +++++-- stage3/nodes.tf | 138 +++++++++++---------- stage3/ogmios.tf | 27 +++- stage3/utxorpc.tf | 66 ++++++++-- 13 files changed, 361 insertions(+), 154 deletions(-) diff --git a/common/defaults-gcp.yaml b/common/defaults-gcp.yaml index 029ac77..1fcafec 100644 --- a/common/defaults-gcp.yaml +++ b/common/defaults-gcp.yaml @@ -8,6 +8,35 @@ vpc_cidr: "10.6.0.0/16" dmtr_namespace: dmtr-system dmtr_context: k3d-dmtr-cluster managed_node_groups: + # GKE + - name: gke-system-az1 + labels: + gke-managed-components: "true" + instance_type: n4-standard-2 + min_size: 1 + max_size: 1 + desired_capacity: 1 + disk_size_gb: 40 + taints: + - key: components.gke.io/gke-managed-components + value: "true" + effect: NO_SCHEDULE + availability_zones: us-central1-a + + - name: gke-system-az2 + labels: + gke-managed-components: "true" + instance_type: n4-standard-2 + min_size: 1 + max_size: 1 + desired_capacity: 1 + disk_size_gb: 40 + taints: + - key: components.gke.io/gke-managed-components + value: "true" + effect: NO_SCHEDULE + availability_zones: us-central1-b + # Consistent - name: co-ad-x86-az1 labels: @@ -15,12 +44,11 @@ managed_node_groups: demeter.run/compute-profile: admin demeter.run/compute-arch: x86 demeter.run/availability-zone: az1 - instance_type: n2-standard-2 + instance_type: n4-standard-2 min_size: 1 max_size: 2 desired_capacity: 1 disk_size_gb: 40 - disk_type: pd-standard taints: - key: demeter.run/availability-sla value: "consistent" @@ -39,12 +67,11 @@ managed_node_groups: demeter.run/compute-profile: admin demeter.run/compute-arch: x86 demeter.run/availability-zone: az2 - instance_type: n2-standard-2 + instance_type: n4-standard-2 min_size: 0 max_size: 1 desired_capacity: 0 disk_size_gb: 40 - disk_type: pd-standard taints: - key: demeter.run/availability-sla value: "consistent" @@ -63,7 +90,7 @@ managed_node_groups: demeter.run/compute-profile: general-purpose demeter.run/compute-arch: x86 demeter.run/availability-zone: az1 - instance_type: n2-standard-8 + instance_type: n4-standard-8 min_size: 0 max_size: 1 desired_capacity: 0 @@ -86,7 +113,7 @@ managed_node_groups: demeter.run/compute-profile: general-purpose demeter.run/compute-arch: x86 demeter.run/availability-zone: az1 - instance_type: n2-standard-8 + instance_type: n4-standard-8 min_size: 0 max_size: 1 desired_capacity: 0 @@ -109,7 +136,7 @@ managed_node_groups: demeter.run/compute-profile: general-purpose demeter.run/compute-arch: arm64 demeter.run/availability-zone: az1 - instance_type: t2a-standard-8 + instance_type: c4a-standard-8 min_size: 0 max_size: 1 desired_capacity: 0 @@ -132,7 +159,7 @@ managed_node_groups: demeter.run/compute-profile: mem-intensive demeter.run/compute-arch: x86 demeter.run/availability-zone: az1 - instance_type: n2-highmem-8 + instance_type: n4-highmem-8 min_size: 0 max_size: 1 desired_capacity: 0 @@ -155,7 +182,7 @@ managed_node_groups: demeter.run/compute-profile: mem-intensive demeter.run/compute-arch: arm64 demeter.run/availability-zone: az1 - instance_type: t2a-standard-16 + instance_type: c4a-highmem-8 min_size: 0 max_size: 1 desired_capacity: 0 @@ -172,50 +199,75 @@ managed_node_groups: effect: NO_SCHEDULE availability_zones: us-central1-a - # Best Effort - - name: be-gp-x86-az1 + - name: co-mem-arm64-az2 + labels: + demeter.run/availability-sla: consistent + demeter.run/compute-profile: mem-intensive + demeter.run/compute-arch: arm64 + demeter.run/availability-zone: az2 + instance_type: c4a-highmem-8 + min_size: 0 + max_size: 1 + desired_capacity: 0 + disk_size_gb: 100 + taints: + - key: demeter.run/availability-sla + value: "consistent" + effect: NO_SCHEDULE + - key: demeter.run/compute-profile + value: "mem-intensive" + effect: NO_SCHEDULE + - key: demeter.run/compute-arch + value: "arm64" + effect: NO_SCHEDULE + availability_zones: us-central1-b + + # Disk Intensive + - name: be-di-x86-az1 labels: demeter.run/availability-sla: best-effort demeter.run/compute-profile: general-purpose demeter.run/compute-arch: x86 demeter.run/availability-zone: az1 - instance_type: n2-standard-8 + instance_type: n2-standard-4 min_size: 0 max_size: 1 desired_capacity: 1 disk_size_gb: 100 + disk_type: pd-ssd spot: true taints: - key: demeter.run/availability-sla - value: "best-effort" + value: "consistent" effect: NO_SCHEDULE - key: demeter.run/compute-profile - value: "general-purpose" + value: "disk-intensive" effect: NO_SCHEDULE - key: demeter.run/compute-arch value: "x86" effect: NO_SCHEDULE availability_zones: us-central1-a - - name: be-gp-arm64-az1 + - name: be-di-arm64-az1 labels: demeter.run/availability-sla: best-effort demeter.run/compute-profile: general-purpose demeter.run/compute-arch: arm64 demeter.run/availability-zone: az1 # Google supports one instance type per node group - instance_type: t2a-standard-8 + instance_type: t2a-standard-4 min_size: 0 max_size: 1 desired_capacity: 1 disk_size_gb: 100 + disk_type: pd-ssd spot: true taints: - key: demeter.run/availability-sla - value: "best-effort" + value: "consistent" effect: NO_SCHEDULE - key: demeter.run/compute-profile - value: "general-purpose" + value: "disk-intensive" effect: NO_SCHEDULE - key: demeter.run/compute-arch value: "arm64" diff --git a/modules/common/cert-manager/stage2/main.tf b/modules/common/cert-manager/stage2/main.tf index 1f8e7d8..0ca9a9e 100644 --- a/modules/common/cert-manager/stage2/main.tf +++ b/modules/common/cert-manager/stage2/main.tf @@ -63,7 +63,7 @@ resource "helm_release" "cert-manager" { set { name = "tolerations[2].value" - value = "admin" + value = "consistent" } set { @@ -113,7 +113,7 @@ resource "helm_release" "cert-manager" { set { name = "webhook.tolerations[2].value" - value = "admin" + value = "consistent" } set { @@ -163,7 +163,7 @@ resource "helm_release" "cert-manager" { set { name = "cainjector.tolerations[2].value" - value = "admin" + value = "consistent" } set { @@ -213,7 +213,7 @@ resource "helm_release" "cert-manager" { set { name = "startupapicheck.tolerations[2].value" - value = "admin" + value = "consistent" } # set { diff --git a/modules/gcp/storage-classes/stage1/main.tf b/modules/gcp/storage-classes/stage1/main.tf index f861ea6..b3a0d57 100644 --- a/modules/gcp/storage-classes/stage1/main.tf +++ b/modules/gcp/storage-classes/stage1/main.tf @@ -15,3 +15,48 @@ resource "kubernetes_storage_class" "gp" { "type" = "pd-balanced" } } + +resource "kubernetes_storage_class" "fast" { + metadata { + name = "fast" + } + + allow_volume_expansion = true + storage_provisioner = "pd.csi.storage.gke.io" + reclaim_policy = "Delete" + volume_binding_mode = "Immediate" + + parameters = { + "type" = "hyperdisk-balanced" + } +} + +resource "kubernetes_storage_class" "hyperdisk-balanced" { + metadata { + name = "hyperdisk-balanced" + } + + allow_volume_expansion = true + storage_provisioner = "pd.csi.storage.gke.io" + reclaim_policy = "Delete" + volume_binding_mode = "WaitForFirstConsumer" + + parameters = { + "type" = "hyperdisk-balanced" + } +} + +resource "kubernetes_storage_class" "hyperdisk-balanced-immediate" { + metadata { + name = "hyperdisk-balanced-immediate" + } + + allow_volume_expansion = true + storage_provisioner = "pd.csi.storage.gke.io" + reclaim_policy = "Delete" + volume_binding_mode = "Immediate" + + parameters = { + "type" = "hyperdisk-balanced" + } +} diff --git a/stage0/gcp-terraform/.terraform.lock.hcl b/stage0/gcp-terraform/.terraform.lock.hcl index a5886cb..dd5b915 100644 --- a/stage0/gcp-terraform/.terraform.lock.hcl +++ b/stage0/gcp-terraform/.terraform.lock.hcl @@ -2,29 +2,30 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/google" { - version = "6.10.0" - constraints = ">= 5.44.2, != 6.0.0, != 6.0.1, != 6.1.0, != 6.2.0, != 6.3.0, != 6.4.0, != 6.5.0, != 6.6.0, < 7.0.0" + version = "6.11.0" + constraints = ">= 6.11.0, 6.11.0, < 7.0.0" hashes = [ - "h1:SE6euwDB1TTcAStda9qAV4PWPjCGLosGesDZDMDoGtU=", - "zh:016ef442d70497f34d209ccba94afa5b5e8027b6a60516452549a04c5f4b1e95", - "zh:0e521ae9ab51dea6f9c310291c9e288a482bf37e149bc3e5920547d2a73a6d23", - "zh:1ad1cc5e8f7c8f0b42cc6d37c5e0a3c77557bb18d91070930d361c3d6866bf23", - "zh:64580f23f5e87d4f843a617dab9a96093671f5826c2de8bc60fb3c619f00810d", - "zh:7d29aed1a73b99e50909fe7ce2fea92ba28cd4b4943d185d9187295f991bf35f", - "zh:80ccce9ad3c64528f05b9432d6bf8278d6555ffcb1c80f563b6f24a88d269979", - "zh:af49d0083c2a46bfd022d35f7a06a0626b71d67f6b3c75b04b5723e8977d1096", - "zh:ce767ca2ed4aaf63d0fcb48f0c2756b26096cff7fd33d513ed65a4e5758371f9", - "zh:d4515ffcf5a804c4f1da750f9a4a5edaef6a4cb95e49040bb18a422eb6b4832e", + "h1:CaOB5RlXeT6Yw/ga28eLeUGpyn2hZml3LcGJenO44cc=", + "zh:0b17488376af1b1aeb23f30fe9b0bab2e9c7e99e26341da66dd320e68e1abc50", + "zh:13dd58fcd9ca6a67a0d4fb80e0b9a97fe0bd08c9eb91dfd7c4fa0ece5a7f94e1", + "zh:20b596f22c865d0a1a9847d7ae08d730521b03d7665c66b91d3fcbe8591ebe51", + "zh:22927ca9092a43ad917d40ab412cdcc1886bbfeadc02a37b642f7458c9971fb6", + "zh:3cc7ceb26c212bdc7fc9e5ae6d99d3cc31d5d46dbd740fd1b9ea240d2d145bfd", + "zh:4969257d0e2113b25d8b5fa309d71e9961fe37d4cc9948c6e717073e8c00ead7", + "zh:56ca1cd5007ed6a8c5addea049fbeb93462a56dd4b642f1543d7e0bef0b5cb31", + "zh:6782014d8b8ddfe09dea8bcdf432cdfed29126452cf4d7b4f23bcdc5da2d2e6b", + "zh:99183f3a99696edb40726860ae53f40edc8b101427dc5ded901068423a011702", + "zh:bd80b93559920fb5e36411dfbf0a56778bd9006e9d92cd3a4786c0925dfdcbfe", + "zh:be12c052b13603d49acece7e42e8907805127b553d6faebd49b84635da95e1d4", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", - "zh:f701206edcfff3e7ee8ca159cde65264e55ec59e2d455facb57cca782a197bbc", - "zh:f984b7b6700f6c075329d43e8a0be25f20f1b124e90e1e1e13bd90a8e468743d", ] } provider "registry.terraform.io/hashicorp/kubernetes" { version = "2.33.0" - constraints = "~> 2.10" + constraints = "~> 2.10, 2.33.0" hashes = [ + "h1:HDyytvOlqNw5fJ0SB/nzgqCWniK4LAZNx23LaPavQq8=", "h1:Z2R1cnALV1BgzldRWir/TUvg10gkWSdEGsYJHFqD3bc=", "zh:255b35790b706d405e987750190658dcaefb663741b96803a9529ba5d7435329", "zh:362feba1aa820a8e02869ec71d1a08e87243dbce43671dc0995fa6c5a2fafa1d", @@ -43,9 +44,10 @@ provider "registry.terraform.io/hashicorp/kubernetes" { provider "registry.terraform.io/hashicorp/random" { version = "3.6.3" - constraints = ">= 2.1.0" + constraints = ">= 2.1.0, 3.6.3" hashes = [ "h1:Fnaec9vA8sZ8BXVlN3Xn9Jz3zghSETIKg7ch8oXhxno=", + "h1:zG9uFP8l9u+yGZZvi5Te7PV62j50azpgwPunq2vTm1E=", "zh:04ceb65210251339f07cd4611885d242cd4d0c7306e86dda9785396807c00451", "zh:448f56199f3e99ff75d5c0afacae867ee795e4dfda6cb5f8e3b2a72ec3583dd8", "zh:4b4c11ccfba7319e901df2dac836b1ae8f12185e37249e8d870ee10bb87a13fe", diff --git a/stage0/gcp-terraform/main.tf b/stage0/gcp-terraform/main.tf index 2625ead..2027e3c 100644 --- a/stage0/gcp-terraform/main.tf +++ b/stage0/gcp-terraform/main.tf @@ -92,7 +92,7 @@ module "gke" { min_count = np.min_size max_count = np.max_size disk_size_gb = np.disk_size_gb - disk_type = try(np.disk_type, "pd-ssd") + disk_type = try(np.disk_type, "hyperdisk-balanced") auto_repair = true auto_upgrade = true service_account = data.google_service_account.existing.email diff --git a/stage0/gcp-terraform/versions.tf b/stage0/gcp-terraform/versions.tf index 188d321..8bf4541 100644 --- a/stage0/gcp-terraform/versions.tf +++ b/stage0/gcp-terraform/versions.tf @@ -4,7 +4,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "6.10.0" + version = "6.11.0" } kubernetes = { source = "hashicorp/kubernetes" diff --git a/stage2/.terraform.lock.hcl b/stage2/.terraform.lock.hcl index 92e1ab7..03f41f8 100644 --- a/stage2/.terraform.lock.hcl +++ b/stage2/.terraform.lock.hcl @@ -2,21 +2,21 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/helm" { - version = "2.12.1" + version = "2.16.1" hashes = [ - "h1:sgYI7lwGqJqPopY3NGmhb1eQ0YbH8PIXaAZAmnJrAvw=", - "zh:1d623fb1662703f2feb7860e3c795d849c77640eecbc5a776784d08807b15004", - "zh:253a5bc62ba2c4314875139e3fbd2feaad5ef6b0fb420302a474ab49e8e51a38", - "zh:282358f4ad4f20d0ccaab670b8645228bfad1c03ac0d0df5889f0aea8aeac01a", - "zh:4fd06af3091a382b3f0d8f0a60880f59640d2b6d9d6a31f9a873c6f1bde1ec50", - "zh:6816976b1830f5629ae279569175e88b497abbbac30ee809948a1f923c67a80d", - "zh:7d82c4150cdbf48cfeec867be94c7b9bd7682474d4df0ebb7e24e148f964844f", - "zh:83f062049eea2513118a4c6054fb06c8600bac96196f25aed2cc21898ec86e93", - "zh:a79eec0cf4c08fca79e44033ec6e470f25ff23c3e2c7f9bc707ed7771c1072c0", - "zh:b2b2d904b2821a6e579910320605bc478bbef063579a23fbfdd6fcb5871b81f8", - "zh:e91177ca06a15487fc570cb81ecef6359aa399459ea2aa7c4f7367ba86f6fcad", - "zh:e976bcb82996fc4968f8382bbcb6673efb1f586bf92074058a232028d97825b1", + "h1:3VdXbh+m09VEAdSguT7Ea2MMnOVPZBYx4sUqvo6NPxo=", + "zh:0003f6719a32aee9afaeeb001687fc0cfc8c2d5f54861298cf1dc5711f3b4e65", + "zh:16cd5bfee09e7bb081b8b4470f31a9af508e52220fd97fd81c6dda725d9422fe", + "zh:51817de8fdc2c2e36785f23fbf4ec022111bd1cf7679498c16ad0ad7471c16db", + "zh:51b95829b2873be40a65809294bffe349e40cfccc3ff6fee0f471d01770e0ebd", + "zh:56b158dde897c47e1460181fc472c3e920aa23db40579fdc2aad333c1456d2dd", + "zh:916641d26c386959eb982e680028aa677b787687ef7c1283241e45620bc8df50", + "zh:aec15ca8605babba77b283f2ca35daca53e006d567e1c3a3daf50497035b820b", + "zh:c2cecf710b87c8f3a4d186da2ea12cf08041f97ae0c6db82649720d6ed929d65", + "zh:dbdd96f17aea25c7db2d516ab8172a5e683c6686c72a1a44173d2fe96319be39", + "zh:de11e180368434a796b1ab6f20fde7554dc74f7800e063b8e4c8ec3a86d0be63", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f827a9c1540d210c56053a2d5d5a6abda924896ffa8eeedc94054cf6d44c5f60", ] } @@ -24,7 +24,7 @@ provider "registry.terraform.io/hashicorp/kubernetes" { version = "2.22.0" constraints = "2.22.0" hashes = [ - "h1:b6Wj111/wsMNg8FrHFXrf4mCZFtSXKHx4JvbZh3YTCY=", + "h1:DJr88+52tPK4Ft9xltF6YL+sRz8HWLP2ZOfFiKSB5Dc=", "zh:1eac662b1f238042b2068401e510f0624efaf51fd6a4dd9c49d710a49d383b61", "zh:4c35651603493437b0b13e070148a330c034ac62c8967c2de9da6620b26adca4", "zh:50c0e8654efb46e3a3666c638ca2e0c8aec07f985fbc80f9205bed960386dc9b", diff --git a/stage2/main.tf b/stage2/main.tf index df24a10..d5b0adc 100644 --- a/stage2/main.tf +++ b/stage2/main.tf @@ -43,8 +43,9 @@ module "cert_manager" { # } module "o11y" { - source = "../modules/common/o11y/stage2" - namespace = var.dmtr_namespace + source = "../modules/common/o11y/stage2" + namespace = var.dmtr_namespace + storage_class = "hyperdisk-balanced" } module "dmtrd" { diff --git a/stage3/.terraform.lock.hcl b/stage3/.terraform.lock.hcl index fbb8e8e..282bf9a 100644 --- a/stage3/.terraform.lock.hcl +++ b/stage3/.terraform.lock.hcl @@ -2,22 +2,21 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/helm" { - version = "2.15.0" + version = "2.16.1" hashes = [ - "h1:VymvscRkDy0+zN2uKpKYY6njXPY8JROARuaL3VPsEos=", - "h1:WfjJptfaDzC4XCht262FFizAMX8fvRDZWtqUmuLcg88=", - "zh:18b94c7c83c30ad166722a61a412e3de6a67935772960e79aaa24c15f8ea0d0f", - "zh:4f07c929a71e8169f7471b7600bfcca36dfb295787e975e82ac0455a3ab68b47", - "zh:776b804a14c3c4ae6075b12176f81c1f1987214ee1cae4a542599389591cde11", - "zh:7c11e3adbe9bd26e88484dcdbd28c473ce3a5c58950a3e3c4f0a2caee225b845", - "zh:821e1a53415df0ae4ed523f098360d367a95d6ce3872ba841f22adfdd2f97664", - "zh:94c06e483f75a11c3f139c41b3f64b51a96d1d1485e7d1fd3c0f795e2e750945", - "zh:aa2040de0b8150ef40222a965445ec40e3df2997ffde1fb062ab4c226689115e", - "zh:ad73eebeffe20228656567963477d034b9ed3d1bd2075c1c81150def4927d810", - "zh:b77450a36807f3ad1d3ae736d1d165a94fa26f476504a280e9fb2ccb89f648d0", - "zh:d2ebd3c34c50c92106ce2df25d5598f47127dc7c60172b9e2fe56ac73dc863a8", - "zh:e565995e2614df5ddde75a743a674129288fb91669596a7b0b2580fa7ed49979", + "h1:3VdXbh+m09VEAdSguT7Ea2MMnOVPZBYx4sUqvo6NPxo=", + "zh:0003f6719a32aee9afaeeb001687fc0cfc8c2d5f54861298cf1dc5711f3b4e65", + "zh:16cd5bfee09e7bb081b8b4470f31a9af508e52220fd97fd81c6dda725d9422fe", + "zh:51817de8fdc2c2e36785f23fbf4ec022111bd1cf7679498c16ad0ad7471c16db", + "zh:51b95829b2873be40a65809294bffe349e40cfccc3ff6fee0f471d01770e0ebd", + "zh:56b158dde897c47e1460181fc472c3e920aa23db40579fdc2aad333c1456d2dd", + "zh:916641d26c386959eb982e680028aa677b787687ef7c1283241e45620bc8df50", + "zh:aec15ca8605babba77b283f2ca35daca53e006d567e1c3a3daf50497035b820b", + "zh:c2cecf710b87c8f3a4d186da2ea12cf08041f97ae0c6db82649720d6ed929d65", + "zh:dbdd96f17aea25c7db2d516ab8172a5e683c6686c72a1a44173d2fe96319be39", + "zh:de11e180368434a796b1ab6f20fde7554dc74f7800e063b8e4c8ec3a86d0be63", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f827a9c1540d210c56053a2d5d5a6abda924896ffa8eeedc94054cf6d44c5f60", ] } @@ -26,7 +25,6 @@ provider "registry.terraform.io/hashicorp/kubernetes" { constraints = "2.31.0" hashes = [ "h1:ZlKkkHJrjF4AiMueI2yA+abBc1c37cfwjyxURdLKhEw=", - "h1:wGHbATbv/pBVTST1MtEn0zyVhZbzZJD2NYq2EddASHY=", "zh:0d16b861edb2c021b3e9d759b8911ce4cf6d531320e5dc9457e2ea64d8c54ecd", "zh:1bad69ed535a5f32dec70561eb481c432273b81045d788eb8b37f2e4a322cc40", "zh:43c58e3912fcd5bb346b5cb89f31061508a9be3ca7dd4cd8169c066203bcdfb3", diff --git a/stage3/kupo.tf b/stage3/kupo.tf index 23f9ada..2496014 100644 --- a/stage3/kupo.tf +++ b/stage3/kupo.tf @@ -4,7 +4,7 @@ locals { # kupo_v1_networks = ["mainnet", "preprod", "preview"] kupo_v1_cluster_issuer = "letsencrypt-dns01" kupo_v1_networks = ["preview"] - kupo_v1_operator_image_tag = "7ed38ec1bd825490a7e7b9b8c130415084ea8976" + kupo_v1_operator_image_tag = "aab07d8cd8fe0fa80281550ce3845108a37f5a0b" kupo_v1_metrics_delay = 60 kupo_v1_per_min_dcus = { mainnet = "36" @@ -19,9 +19,9 @@ locals { kupo_v1_ingress_class = "kong" kupo_v1_extension_subdomain = "kupo" kupo_v1_dns_zone = "dmtr.host" - kupo_v1_proxy_green_image_tag = "e7e26f0e3e82ceabf04ceee6d536d500e14e02ab" + kupo_v1_proxy_green_image_tag = "9c0d2ed7d7758c85106d65a171f306bba7d5c64a" kupo_v1_proxy_green_replicas = "1" - kupo_v1_proxy_blue_image_tag = "e7e26f0e3e82ceabf04ceee6d536d500e14e02ab" + kupo_v1_proxy_blue_image_tag = "9c0d2ed7d7758c85106d65a171f306bba7d5c64a" kupo_v1_proxy_blue_replicas = "0" kupo_v1_proxy_resources = { limits = { @@ -42,7 +42,7 @@ module "ext_cardano_kupo_crds" { module "ext_cardano_kupo" { # source = "git::https://github.com/demeter-run/ext-cardano-kupo.git//bootstrap?ref=feat/ext-kupo-demeter-up" - source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-kupo.git//bootstrap?ref=feat/ext-kupo-demeter-up" + source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-kupo.git//bootstrap?ref=feat/add-dynamic-tolerations" for_each = toset([for n in toset(["v1"]) : n if var.enable_cardano_kupo]) namespace = "ftr-kupo-${each.key}" cloud_provider = var.cloud_provider @@ -66,8 +66,8 @@ module "ext_cardano_kupo" { cells = { "cell1" = { pvc = { - storage_size = "10Gi" - storage_class_name = "gp-immediate" + storage_size = "50Gi" + storage_class_name = "hyperdisk-balanced-immediate" access_mode = "ReadWriteOnce" } instances = { @@ -79,14 +79,40 @@ module "ext_cardano_kupo" { n2n_endpoint = "node-preview-stable.ext-nodes-m1.svc.cluster.local:3307" resources = { limits = { - cpu = "500m" - memory = "512Mi" + cpu = "1" + memory = "4Gi" } requests = { cpu = "250m" - memory = "256Mi" + memory = "4Gi" } } + tolerations = [ + { + effect = "NoSchedule" + key = "demeter.run/compute-profile" + operator = "Equal" + value = "mem-intensive" + }, + { + effect = "NoSchedule" + key = "demeter.run/compute-arch" + operator = "Equal" + value = "arm64" + }, + { + effect = "NoSchedule" + key = "demeter.run/availability-sla" + operator = "Equal" + value = "consistent" + }, + { + effect = "NoSchedule" + key = "kubernetes.io/arch" + operator = "Equal" + value = "arm64" + } + ] } } } diff --git a/stage3/nodes.tf b/stage3/nodes.tf index 7ac2bf5..596a095 100644 --- a/stage3/nodes.tf +++ b/stage3/nodes.tf @@ -4,8 +4,6 @@ locals { cnode_v1_default_image_tag = "10.1.2" cnode_v1_edge_base_image = "ghcr.io/demeter-run/cardano-node-edge" cnode_v1_edge_image_tag = "be248be99fa238ebb9c2f72e6042739bf02263d6" - cnode_v1_v135_base_image = "ghcr.io/blinklabs-io/cardano-node" - cnode_v1_v135_image_tag = "1.35.7-8" cnode_v1_api_key_salt = coalesce(var.cnode_v1_api_key_salt, "this is a random generated key and must be shared...") } @@ -21,7 +19,7 @@ module "ext_cardano_node_crds" { # } module "ext_cardano_node" { - source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-node.git//bootstrap?ref=feat/gcp-cloud-provider" + source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-node.git//bootstrap?ref=feat/list-tolerations" for_each = toset([for n in toset(["m1"]) : n if var.enable_cardano_node]) namespace = local.cnode_v1_namespace cloud_provider = var.cloud_provider @@ -29,11 +27,11 @@ module "ext_cardano_node" { extension_name = "cnode-${each.key}" operator_image_tag = "9f24ebfe1ca56351fa44ab47e5a3fdb815d0f213" proxy_blue_image_tag = "9f24ebfe1ca56351fa44ab47e5a3fdb815d0f213" - proxy_blue_replicas = 2 + proxy_blue_replicas = 1 proxy_blue_instances_namespace = local.cnode_v1_namespace proxy_blue_healthcheck_port = 31789 proxy_green_image_tag = "9f24ebfe1ca56351fa44ab47e5a3fdb815d0f213" - proxy_green_replicas = 2 + proxy_green_replicas = 1 proxy_green_instances_namespace = local.cnode_v1_namespace proxy_green_healthcheck_port = 32171 api_key_salt = local.cnode_v1_api_key_salt @@ -57,29 +55,70 @@ module "ext_cardano_node" { # topology_zone = "us-central1-a" # node_resources = { # limits = { - # "memory" = "16Gi" - # "cpu" = "8" + # "memory" = "4Gi" + # "cpu" = "4" # } # requests = { - # "memory" = "16Gi" + # "memory" = "4Gi" # "cpu" = "2" # } # } - # storage_size = "200Gi" - # storage_class_name = "gp" + # storage_size = "500Gi" + # storage_class_name = "pd-ssd" # node_version = local.cnode_v1_default_image_tag - # replicas = 2 + # replicas = 1 # restore = true + # tolerations = [ + # { + # effect = "NoSchedule" + # key = "kubernetes.io/arch" + # operator = "Equal" + # value = "amd64" + # } + # ] # } - "preview-stage-v6g" = { + "preprod-stable-v6g" = { + node_image = local.cnode_v1_default_base_image + image_tag = local.cnode_v1_default_image_tag + network = "preprod" + salt = "v6g" + release = "stable" + magic = 1 + topology_zone = "us-central1-a" + node_resources = { + limits = { + "memory" = "2Gi" + "cpu" = "8" + } + requests = { + "memory" = "2Gi" + "cpu" = "100m" + } + } + storage_size = "50Gi" + storage_class_name = "hyperdisk-balanced" + node_version = local.cnode_v1_default_image_tag + replicas = 1 + restore = true + tolerations = [ + { + effect = "NoSchedule" + key = "kubernetes.io/arch" + operator = "Equal" + value = "arm64" + } + ] + } + + "preview-stable-v6g" = { node_image = local.cnode_v1_default_base_image image_tag = local.cnode_v1_default_image_tag network = "preview" salt = "v6g" release = "stable" magic = 2 - topology_zone = "us-central1-b" + topology_zone = "us-central1-a" node_resources = { limits = { "memory" = "3Gi" @@ -90,47 +129,35 @@ module "ext_cardano_node" { "cpu" = "100m" } } - storage_class_name = "gp" + storage_size = "50Gi" + storage_class_name = "hyperdisk-balanced" node_version = local.cnode_v1_default_image_tag - replicas = 2 + replicas = 1 restore = true + tolerations = [ + { + effect = "NoSchedule" + key = "kubernetes.io/arch" + operator = "Equal" + value = "arm64" + } + ] } - - # "preview-v135-a31" = { - # node_image = local.cnode_v1_v135_base_image - # image_tag = local.cnode_v1_v135_image_tag - # network = "preview" - # salt = "a31" - # release = "v135" - # magic = 2 - # topology_zone = "us-central1-b" - # node_version = "1.35.7" - # replicas = 1 - # restore = true - # node_resources = { - # limits = { - # "memory" = "3Gi" - # "cpu" = "8" - # } - # requests = { - # "memory" = "3Gi" - # "cpu" = "100m" - # } - # } - # } } services = { - # "mainnet-stable" = { - # network = "mainnet" - # release = "stable" - # active_salt = "v6g" - # } - # "preprod-stable" = { - # network = "preprod" - # release = "stable" - # active_salt = "v6g" - # } + "mainnet-stable" = { + network = "mainnet" + release = "stable" + node_version = local.cnode_v1_default_image_tag + active_salt = "v6g" + } + "preprod-stable" = { + network = "preprod" + release = "stable" + node_version = local.cnode_v1_default_image_tag + active_salt = "v6g" + } "preview-stable" = { network = "preview" release = "stable" @@ -147,20 +174,5 @@ module "ext_cardano_node" { # release = "edge" # active_salt = "v6g" # } - # "mainnet-v135" = { - # network = "mainnet" - # release = "v135" - # active_salt = "a31" - # } - # "preview-v135" = { - # network = "preview" - # release = "v135" - # active_salt = "a31" - # } - # "preprod-v135" = { - # network = "preprod" - # release = "v135" - # active_salt = "a31" - # } } } diff --git a/stage3/ogmios.tf b/stage3/ogmios.tf index 81af86c..eb193e9 100644 --- a/stage3/ogmios.tf +++ b/stage3/ogmios.tf @@ -29,7 +29,7 @@ module "ext_cardano_ogmios_crds" { module "ext_cardano_ogmios" { # source = "git::https://github.com/demeter-run/ext-cardano-kupo.git//bootstrap" - source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-ogmios.git//bootstrap?ref=feat/ext-ogmios-demeter-up" + source = "git::https://github.com/blinklabs-io/demeter-ext-cardano-ogmios.git//bootstrap?ref=feat/add-dynamic-tolerations" for_each = toset([for n in toset(["v1"]) : n if var.enable_cardano_ogmios]) namespace = local.ogmios_v1_namespace networks = local.ogmios_v1_networks @@ -65,6 +65,31 @@ module "ext_cardano_ogmios" { memory = "256Mi" } } + tolerations = [ + { + effect = "NoSchedule" + key = "demeter.run/compute-profile" + operator = "Exists" + }, + { + effect = "NoSchedule" + key = "demeter.run/compute-arch" + operator = "Equal" + value = "arm64" + }, + { + effect = "NoSchedule" + key = "demeter.run/availability-sla" + operator = "Equal" + value = "consistent" + }, + { + effect = "NoSchedule" + key = "kubernetes.io/arch" + operator = "Equal" + value = "arm64" + } + ] } } } diff --git a/stage3/utxorpc.tf b/stage3/utxorpc.tf index 28a1d4a..c20b10b 100644 --- a/stage3/utxorpc.tf +++ b/stage3/utxorpc.tf @@ -86,7 +86,7 @@ module "ext_cardano_utxorpc" { network_addresses = { # "cardano-mainnet" : "relay.utxorpc-m0.demeter.run:3000" - "cardano-preprod" : "relay.utxorpc-m0.demeter.run:3001" + # "cardano-preprod" : "relay.utxorpc-m0.demeter.run:3001" # "cardano-preview" : "relay.utxorpc-m0.demeter.run:3002" } @@ -107,20 +107,38 @@ module "ext_cardano_utxorpc" { cells = { "cell1" = { - # tolerations = [ - # { - # effect = "NoSchedule" - # key = "demeter.run/compute-arch" - # operator = "Equal" - # value = "arm64" - # } - # ] + tolerations = [ + { + effect = "NoSchedule" + key = "demeter.run/availability-sla" + operator = "Equal" + value = "consistent" + }, + { + effect = "NoSchedule" + key = "demeter.run/compute-arch" + operator = "Equal" + value = "arm64" + }, + { + effect = "NoSchedule" + key = "demeter.run/compute-profile" + operator = "Equal" + value = "disk-intensive" + }, + { + effect = "NoSchedule" + key = "kubernetes.io/arch" + operator = "Equal" + value = "arm64" + } + ] pvc = { storage_class = "gp-immediate" storage_size = "30Gi" } instances = { - "cardano-preprod" = { + "cardano-preview" = { dolos_version = "sha-1618ebb" replicas = 1 resources = { @@ -134,6 +152,34 @@ module "ext_cardano_utxorpc" { } } } + # "cardano-preprod" = { + # dolos_version = "sha-1618ebb" + # replicas = 1 + # resources = { + # limits = { + # cpu = "1000m" + # memory = "8Gi" + # } + # requests = { + # cpu = "50m" + # memory = "512Mi" + # } + # } + # } + # "cardano-mainnet" = { + # dolos_version = "sha-1618ebb" + # replicas = 1 + # resources = { + # limits = { + # cpu = "1000m" + # memory = "8Gi" + # } + # requests = { + # cpu = "50m" + # memory = "512Mi" + # } + # } + # } } } }