From fe562f1dfa5e1cc54c6a809a573eebe3b7278177 Mon Sep 17 00:00:00 2001 From: "Johannes M. Scheuermann" Date: Tue, 30 May 2023 13:42:43 +0200 Subject: [PATCH] Initial support for three data hall replication --- .../foundationdb_database_configuration.go | 34 ++++- ...oundationdb_database_configuration_test.go | 36 ++++++ api/v1beta2/foundationdb_labels.go | 12 ++ api/v1beta2/foundationdbcluster_types.go | 7 +- ...foundationdb.org_foundationdbclusters.yaml | 2 + config/tests/three_data_hall/Readme.md | 21 +++ config/tests/three_data_hall/create.bash | 34 +++++ config/tests/three_data_hall/delete.bash | 5 + config/tests/three_data_hall/final.yaml | 40 ++++++ config/tests/three_data_hall/functions.bash | 29 +++++ config/tests/three_data_hall/stage_1.yaml | 40 ++++++ controllers/change_coordinators.go | 2 +- controllers/change_coordinators_test.go | 122 ++++++++++++++++-- docs/manual/fault_domains.md | 59 +++++++++ internal/locality/locality.go | 24 +++- internal/locality/locality_test.go | 76 ++++++++++- internal/monitor_conf.go | 33 ++++- 17 files changed, 543 insertions(+), 33 deletions(-) create mode 100644 config/tests/three_data_hall/Readme.md create mode 100755 config/tests/three_data_hall/create.bash create mode 100755 config/tests/three_data_hall/delete.bash create mode 100644 config/tests/three_data_hall/final.yaml create mode 100644 config/tests/three_data_hall/functions.bash create mode 100644 config/tests/three_data_hall/stage_1.yaml diff --git a/api/v1beta2/foundationdb_database_configuration.go b/api/v1beta2/foundationdb_database_configuration.go index fbf947083..103edc71c 100644 --- a/api/v1beta2/foundationdb_database_configuration.go +++ b/api/v1beta2/foundationdb_database_configuration.go @@ -33,7 +33,7 @@ import ( type DatabaseConfiguration struct { // RedundancyMode defines the core replication factor for the database. // +kubebuilder:validation:Optional - // +kubebuilder:validation:Enum=single;double;triple + // +kubebuilder:validation:Enum=single;double;triple;three_data_hall // +kubebuilder:default:double RedundancyMode RedundancyMode `json:"redundancy_mode,omitempty"` @@ -211,6 +211,19 @@ func (configuration DatabaseConfiguration) NormalizeConfiguration() DatabaseConf return *result } +// CountUniqueDataCenters returns the number of unique data centers based on the desired DatabaseConfiguration. +func (configuration *DatabaseConfiguration) CountUniqueDataCenters() int { + uniqueDataCenters := map[string]None{} + + for _, region := range configuration.Regions { + for _, dc := range region.DataCenters { + uniqueDataCenters[dc.ID] = None{} + } + } + + return len(uniqueDataCenters) +} + // NormalizeConfigurationWithSeparatedProxies ensures a standardized // format and defaults when comparing database configuration in the // cluster spec with database configuration in the cluster status, @@ -284,7 +297,7 @@ func (configuration *DatabaseConfiguration) GetRoleCountsWithDefaults(version Ve counts.Storage = 2*faultTolerance + 1 } if counts.Logs == 0 { - counts.Logs = 3 + counts.Logs = configuration.RedundancyMode.getDefaultLogCount() } if version.HasSeparatedProxies() { @@ -597,9 +610,11 @@ func (configuration DatabaseConfiguration) FillInDefaultsFromStatus() DatabaseCo if result.RemoteLogs == 0 { result.RemoteLogs = -1 } + if result.LogRouters == 0 { result.LogRouters = -1 } + return *result } @@ -678,7 +693,7 @@ func DesiredFaultTolerance(redundancyMode RedundancyMode) int { return 0 case RedundancyModeDouble, RedundancyModeUnset: return 1 - case RedundancyModeTriple: + case RedundancyModeTriple, RedundancyModeThreeDataHall: return 2 default: return 0 @@ -694,6 +709,8 @@ func MinimumFaultDomains(redundancyMode RedundancyMode) int { return 2 case RedundancyModeTriple: return 3 + case RedundancyModeThreeDataHall: + return 4 default: return 1 } @@ -710,6 +727,8 @@ const ( RedundancyModeDouble RedundancyMode = "double" // RedundancyModeTriple defines the replication factor 3. RedundancyModeTriple RedundancyMode = "triple" + // RedundancyModeThreeDataHall defines the replication factor three_data_hall. + RedundancyModeThreeDataHall RedundancyMode = "three_data_hall" // RedundancyModeOneSatelliteSingle defines the replication factor one_satellite_single. RedundancyModeOneSatelliteSingle RedundancyMode = "one_satellite_single" // RedundancyModeOneSatelliteDouble defines the replication factor one_satellite_double. @@ -718,6 +737,15 @@ const ( RedundancyModeUnset RedundancyMode = "" ) +// getDefaultLogCount returns the default log count for the provided redundancy mode +func (redundancyMode RedundancyMode) getDefaultLogCount() int { + if redundancyMode == RedundancyModeThreeDataHall { + return 4 + } + + return 3 +} + // StorageEngine defines the storage engine for the database // +kubebuilder:validation:MaxLength=100 type StorageEngine string diff --git a/api/v1beta2/foundationdb_database_configuration_test.go b/api/v1beta2/foundationdb_database_configuration_test.go index ec63884bf..4e875837d 100644 --- a/api/v1beta2/foundationdb_database_configuration_test.go +++ b/api/v1beta2/foundationdb_database_configuration_test.go @@ -158,6 +158,42 @@ var _ = Describe("DatabaseConfiguration", func() { }) }) + When("a three_data_hall cluster with the default values is provided", func() { + var cluster *FoundationDBCluster + + BeforeEach(func() { + cluster = &FoundationDBCluster{ + Spec: FoundationDBClusterSpec{ + Version: "7.1.33", + DataHall: "az1", + ProcessCounts: ProcessCounts{ + Stateless: -1, + }, + DatabaseConfiguration: DatabaseConfiguration{ + StorageEngine: StorageEngineSSD, + RedundancyMode: RedundancyModeThreeDataHall, + UsableRegions: 1, + }, + }, + } + }) + + When("getting the default process counts", func() { + var err error + var counts ProcessCounts + + BeforeEach(func() { + counts, err = cluster.GetProcessCountsWithDefaults() + }) + + It("It should calculate the default process counts", func() { + Expect(err).NotTo(HaveOccurred()) + Expect(counts.Log).To(BeNumerically("==", 6)) // 4 required + 2 additional + Expect(counts.Storage).To(BeNumerically("==", 5)) + }) + }) + }) + When("using ProcessCounts", func() { When("calculating the total number of processes", func() { var counts ProcessCounts diff --git a/api/v1beta2/foundationdb_labels.go b/api/v1beta2/foundationdb_labels.go index d3fd276c8..e2812beed 100644 --- a/api/v1beta2/foundationdb_labels.go +++ b/api/v1beta2/foundationdb_labels.go @@ -65,6 +65,10 @@ const ( // the zone ID. FDBLocalityZoneIDKey = "zoneid" + // FDBLocalityMachineIDKey represents the key in the locality map that holds + // the machine ID. + FDBLocalityMachineIDKey = "machineid" + // FDBLocalityDCIDKey represents the key in the locality map that holds // the DC ID. FDBLocalityDCIDKey = "dcid" @@ -76,4 +80,12 @@ const ( // FDBLocalityProcessIDKey represents the key in the locality map that // holds the process ID. FDBLocalityProcessIDKey = "process_id" + + // FDBLocalityDataHallKey represents the key in the locality map that holds + // the data hall. + FDBLocalityDataHallKey = "data_hall" + + // FDBLocalityDCIDlKey represents the key in the locality map that holds + // the data center ID. + FDBLocalityDCIDlKey = "dcid" ) diff --git a/api/v1beta2/foundationdbcluster_types.go b/api/v1beta2/foundationdbcluster_types.go index 9ab386d5f..0ae648d63 100644 --- a/api/v1beta2/foundationdbcluster_types.go +++ b/api/v1beta2/foundationdbcluster_types.go @@ -1127,7 +1127,7 @@ func (cluster *FoundationDBCluster) GetProcessSettings(processClass ProcessClass // The default Storage value will be 2F + 1, where F is the cluster's fault // tolerance. // -// The default Logs value will be 3. +// The default Logs value will be 3 or 4 for three_data_hall. // // The default Proxies value will be 3. // @@ -1289,10 +1289,9 @@ func (cluster *FoundationDBCluster) MinimumFaultDomains() int { return MinimumFaultDomains(cluster.Spec.DatabaseConfiguration.RedundancyMode) } -// DesiredCoordinatorCount returns the number of coordinators to recruit for -// a cluster. +// DesiredCoordinatorCount returns the number of coordinators to recruit for a cluster. func (cluster *FoundationDBCluster) DesiredCoordinatorCount() int { - if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 { + if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 || cluster.Spec.DatabaseConfiguration.RedundancyMode == RedundancyModeThreeDataHall { return 9 } diff --git a/config/crd/bases/apps.foundationdb.org_foundationdbclusters.yaml b/config/crd/bases/apps.foundationdb.org_foundationdbclusters.yaml index 5a1115089..f25c79d8a 100644 --- a/config/crd/bases/apps.foundationdb.org_foundationdbclusters.yaml +++ b/config/crd/bases/apps.foundationdb.org_foundationdbclusters.yaml @@ -10079,6 +10079,7 @@ spec: - single - double - triple + - three_data_hall maxLength: 100 type: string regions: @@ -13704,6 +13705,7 @@ spec: - single - double - triple + - three_data_hall maxLength: 100 type: string regions: diff --git a/config/tests/three_data_hall/Readme.md b/config/tests/three_data_hall/Readme.md new file mode 100644 index 000000000..8f3a14a33 --- /dev/null +++ b/config/tests/three_data_hall/Readme.md @@ -0,0 +1,21 @@ +# Three-Data-hall example + +This example requires that your Kubernetes cluster has nodes which are labeled with `topology.kubernetes.io/zone`. +The example requires at least 3 unique zones, those can be faked for testing, by adding the labels to a node. +If you want to use cloud provider specific zone label values you can set the `AZ1`, `AZ2` and `AZ3` environment variables. + +## Create the Three-Data-Hall cluster + +This will bring up a FDB cluster using the three-data-hall redundancy mode. + +```bash +./create.bash +``` + +## Delete + +This will remove all created resources: + +```bash +./delete.bash +``` diff --git a/config/tests/three_data_hall/create.bash b/config/tests/three_data_hall/create.bash new file mode 100755 index 000000000..e35f54b57 --- /dev/null +++ b/config/tests/three_data_hall/create.bash @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -eu + +# This directory provides an example of creating a cluster using the three_data_hall +# replication topology. +# +# This example is built for local testing, so it will create all of the Pods +# within a single Kubernetes cluster, but will give false locality information for the zones +# to make the processes believe they are in different locations. +# +# You can use this script to bootstrap the cluster. Once it finishes, you can +# make changes to the cluster by editing the final.yaml file and running the +# apply.bash script. You can clean up the clusters by running the delete.bash +# script. +DIR="${BASH_SOURCE%/*}" + +. $DIR/functions.bash + +AZ1=${AZ1:-"az1"} +AZ2=${AZ2:-"az2"} +AZ3=${AZ3:-"az3"} + +applyFile "${DIR}/stage_1.yaml" "${AZ1}" '""' +checkReconciliationLoop test-cluster-${AZ1} +connectionString=$(getConnectionString test-cluster-${AZ1}) + +applyFile "${DIR}/final.yaml" "${AZ1}" "${connectionString}" +applyFile "${DIR}/final.yaml" ${AZ2} "${connectionString}" +applyFile "${DIR}/final.yaml" ${AZ3} "${connectionString}" + +checkReconciliationLoop test-cluster-${AZ1} +checkReconciliationLoop test-cluster-${AZ2} +checkReconciliationLoop test-cluster-${AZ3} diff --git a/config/tests/three_data_hall/delete.bash b/config/tests/three_data_hall/delete.bash new file mode 100755 index 000000000..387266c1b --- /dev/null +++ b/config/tests/three_data_hall/delete.bash @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -eu + +kubectl delete fdb -l cluster-group=test-cluster diff --git a/config/tests/three_data_hall/final.yaml b/config/tests/three_data_hall/final.yaml new file mode 100644 index 000000000..96df4add6 --- /dev/null +++ b/config/tests/three_data_hall/final.yaml @@ -0,0 +1,40 @@ +# This file provides an example of a cluster you can run in a local testing +# environment to create a simulation of a three_data_hall cluster. +# +# This requires variables to be interpolated for $az and $connectionString +apiVersion: apps.foundationdb.org/v1beta2 +kind: FoundationDBCluster +metadata: + labels: + cluster-group: test-cluster + name: test-cluster-$az +spec: + version: 7.1.26 + faultDomain: + key: foundationdb.org/none + processGroupIDPrefix: $az + dataHall: $az + processCounts: + stateless: -1 + seedConnectionString: $connectionString + databaseConfiguration: + redundancy_mode: "three_data_hall" + processes: + general: + customParameters: + - "knob_disable_posix_kernel_aio=1" + volumeClaimTemplate: + spec: + resources: + requests: + storage: "16G" + podTemplate: + spec: + nodeSelector: + "topology.kubernetes.io/zone": "$az" + containers: + - name: foundationdb + resources: + requests: + cpu: 250m + memory: 128Mi diff --git a/config/tests/three_data_hall/functions.bash b/config/tests/three_data_hall/functions.bash new file mode 100644 index 000000000..a2fc36269 --- /dev/null +++ b/config/tests/three_data_hall/functions.bash @@ -0,0 +1,29 @@ +function applyFile() { + az=${2} + + az="${az}" connectionString="${3}" envsubst < "${1}"| kubectl apply -f - +} + +function checkReconciliation() { + clusterName=$1 + + generationsOutput=$(kubectl get fdb "${clusterName}" -o jsonpath='{.metadata.generation} {.status.generations.reconciled}') + read -ra generations <<< "${generationsOutput}" + if [[ ("${#generations[@]}" -ge 2) && ("${generations[0]}" == "${generations[1]}") ]]; then + return 1 + else + echo "Latest generations for $clusterName: $generationsOutput" + return 0 + fi +} + +function getConnectionString() { + kubectl get fdb "${1}" -o jsonpath='{.status.connectionString}' +} + +function checkReconciliationLoop() { + while checkReconciliation "${1}" ; do + echo "Waiting for reconciliation" + sleep 5 + done +} diff --git a/config/tests/three_data_hall/stage_1.yaml b/config/tests/three_data_hall/stage_1.yaml new file mode 100644 index 000000000..5e272ff13 --- /dev/null +++ b/config/tests/three_data_hall/stage_1.yaml @@ -0,0 +1,40 @@ +# This file provides an example of a cluster you can run in a local testing +# environment to create a simulation of a three_data_hall cluster. +# +# This requires variables to be interpolated for $az and $connectionString +apiVersion: apps.foundationdb.org/v1beta2 +kind: FoundationDBCluster +metadata: + labels: + cluster-group: test-cluster + name: test-cluster-$az +spec: + version: 7.1.26 + faultDomain: + key: foundationdb.org/none + processGroupIDPrefix: $az + dataHall: $az + processCounts: + stateless: -1 + seedConnectionString: $connectionString + databaseConfiguration: + redundancy_mode: "triple" + processes: + general: + customParameters: + - "knob_disable_posix_kernel_aio=1" + volumeClaimTemplate: + spec: + resources: + requests: + storage: "16G" + podTemplate: + spec: + nodeSelector: + "topology.kubernetes.io/zone": "$az" + containers: + - name: foundationdb + resources: + requests: + cpu: 250m + memory: 128Mi diff --git a/controllers/change_coordinators.go b/controllers/change_coordinators.go index cb3b9a9f1..4d72834a6 100644 --- a/controllers/change_coordinators.go +++ b/controllers/change_coordinators.go @@ -159,7 +159,7 @@ func selectCoordinators(logger logr.Logger, cluster *fdbv1beta2.FoundationDBClus HardLimits: locality.GetHardLimits(cluster), }) - logger.Info("Current coordinators", "coordinators", coordinators) + logger.Info("Current coordinators", "coordinators", coordinators, "coordinatorCount", coordinatorCount) if err != nil { return candidates, err } diff --git a/controllers/change_coordinators_test.go b/controllers/change_coordinators_test.go index 94bf3383d..79ba6d699 100644 --- a/controllers/change_coordinators_test.go +++ b/controllers/change_coordinators_test.go @@ -87,7 +87,7 @@ var _ = Describe("Change coordinators", func() { // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { - Expect(strings.HasPrefix(candidate.ID, "storage")).To(BeTrue()) + Expect(candidate.ID).To(HavePrefix("storage")) } }) }) @@ -109,7 +109,7 @@ var _ = Describe("Change coordinators", func() { // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { Expect(candidate.ID).NotTo(Equal(removedProcess)) - Expect(strings.HasPrefix(candidate.ID, "storage")).To(BeTrue()) + Expect(candidate.ID).To(HavePrefix("storage")) } }) }) @@ -127,7 +127,7 @@ var _ = Describe("Change coordinators", func() { // Only select Storage processes since we select 3 processes and we have 4 storage processes for _, candidate := range candidates { Expect(candidate.ID).NotTo(Equal("storage-2")) - Expect(strings.HasPrefix(candidate.ID, "storage")).To(BeTrue()) + Expect(candidate.ID).To(HavePrefix("storage")) } }) }) @@ -232,7 +232,7 @@ var _ = Describe("Change coordinators", func() { Expect(err).NotTo(HaveOccurred()) // generate status for 2 dcs and 1 sate - status.Cluster.Processes = generateProcessInfo(dcCnt, satCnt, excludes) + status.Cluster.Processes = generateProcessInfoForMultiRegion(dcCnt, satCnt, excludes) candidates, err = selectCoordinators(logr.Discard(), cluster, status) if shouldFail { @@ -612,6 +612,74 @@ var _ = Describe("Change coordinators", func() { }) }) }) + + When("using a FDB cluster with three_data_hall", func() { + var status *fdbv1beta2.FoundationDBStatus + var candidates []locality.Info + + JustBeforeEach(func() { + cluster.Spec.DataHall = "az1" + cluster.Spec.DatabaseConfiguration.RedundancyMode = fdbv1beta2.RedundancyModeThreeDataHall + + var err error + status, err = adminClient.GetStatus() + Expect(err).NotTo(HaveOccurred()) + + status.Cluster.Processes = generateProcessInfoForThreeDataHall(3, nil) + + candidates, err = selectCoordinators(logr.Discard(), cluster, status) + Expect(err).NotTo(HaveOccurred()) + }) + + When("all processes are healthy", func() { + It("should only select storage processes", func() { + Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) + Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + + dataHallCount := map[string]int{} + for _, candidate := range candidates { + Expect(candidate.ID).To(ContainSubstring("storage")) + dataHallCount[strings.Split(candidate.ID, "-")[0]]++ + } + + Expect(dataHallCount).To(Equal(map[string]int{ + "datahall0": 3, + "datahall1": 3, + "datahall2": 3, + })) + }) + }) + + When("when one storage process is marked for removal", func() { + removedProcess := fdbv1beta2.ProcessGroupID("storage-2") + + BeforeEach(func() { + cluster.Spec.ProcessGroupsToRemove = []fdbv1beta2.ProcessGroupID{ + removedProcess, + } + Expect(cluster.ProcessGroupIsBeingRemoved(removedProcess)).To(BeTrue()) + }) + + It("should only select storage processes and exclude the removed process", func() { + Expect(cluster.DesiredCoordinatorCount()).To(BeNumerically("==", 9)) + Expect(len(candidates)).To(BeNumerically("==", cluster.DesiredCoordinatorCount())) + + // Only select Storage processes since we select 3 processes and we have 4 storage processes + dataHallCount := map[string]int{} + for _, candidate := range candidates { + Expect(candidate.ID).NotTo(Equal(removedProcess)) + Expect(candidate.ID).To(ContainSubstring("storage")) + dataHallCount[strings.Split(candidate.ID, "-")[0]]++ + } + + Expect(dataHallCount).To(Equal(map[string]int{ + "datahall0": 3, + "datahall1": 3, + "datahall2": 3, + })) + }) + }) + }) }) Describe("reconcile", func() { @@ -706,30 +774,51 @@ var _ = Describe("Change coordinators", func() { }) }) -func generateProcessInfo(dcCount int, satCount int, excludes []string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { +func generateProcessInfoForMultiRegion(dcCount int, satCount int, excludes []string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { res := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo{} logCnt := 4 for i := 0; i < dcCount; i++ { dcid := fmt.Sprintf("dc%d", i) - generateProcessInfoDetails(res, dcid, 8, excludes, fdbv1beta2.ProcessClassStorage) - generateProcessInfoDetails(res, dcid, logCnt, excludes, fdbv1beta2.ProcessClassLog) + generateProcessInfoDetails(res, dcid, "", 8, excludes, fdbv1beta2.ProcessClassStorage) + generateProcessInfoDetails(res, dcid, "", logCnt, excludes, fdbv1beta2.ProcessClassLog) } for i := 0; i < satCount; i++ { dcid := fmt.Sprintf("sat%d", i) - generateProcessInfoDetails(res, dcid, logCnt, excludes, fdbv1beta2.ProcessClassLog) + generateProcessInfoDetails(res, dcid, "", logCnt, excludes, fdbv1beta2.ProcessClassLog) } return res } -func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo, dcID string, cnt int, excludes []string, pClass fdbv1beta2.ProcessClass) { +func generateProcessInfoForThreeDataHall(dataHallCount int, excludes []string) map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo { + res := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo{} + logCnt := 4 + + for i := 0; i < dataHallCount; i++ { + dataHallID := fmt.Sprintf("datahall%d", i) + generateProcessInfoDetails(res, "", dataHallID, 8, excludes, fdbv1beta2.ProcessClassStorage) + generateProcessInfoDetails(res, "", dataHallID, logCnt, excludes, fdbv1beta2.ProcessClassLog) + } + + return res +} + +func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.FoundationDBStatusProcessInfo, dcID string, dataHall string, cnt int, excludes []string, pClass fdbv1beta2.ProcessClass) { for idx := 0; idx < cnt; idx++ { excluded := false - zoneID := fmt.Sprintf("%s-%s-%d", dcID, pClass, idx) + var zoneID string + + if dcID != "" { + zoneID = fmt.Sprintf("%s-%s-%d", dcID, pClass, idx) + } + + if dataHall != "" { + zoneID = fmt.Sprintf("%s-%s-%d", dataHall, pClass, idx) + } for _, exclude := range excludes { if exclude != zoneID { @@ -741,12 +830,11 @@ func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.Fou } addr := fmt.Sprintf("1.1.1.%d:4501", len(res)) - res[fdbv1beta2.ProcessGroupID(zoneID)] = fdbv1beta2.FoundationDBStatusProcessInfo{ + processInfo := fdbv1beta2.FoundationDBStatusProcessInfo{ ProcessClass: pClass, Locality: map[string]string{ fdbv1beta2.FDBLocalityInstanceIDKey: zoneID, fdbv1beta2.FDBLocalityZoneIDKey: zoneID, - fdbv1beta2.FDBLocalityDCIDKey: dcID, }, Excluded: excluded, Address: fdbv1beta2.ProcessAddress{ @@ -755,5 +843,15 @@ func generateProcessInfoDetails(res map[fdbv1beta2.ProcessGroupID]fdbv1beta2.Fou }, CommandLine: fmt.Sprintf("/fdbserver --public_address=%s", addr), } + + if dcID != "" { + processInfo.Locality[fdbv1beta2.FDBLocalityDCIDKey] = dcID + } + + if dataHall != "" { + processInfo.Locality[fdbv1beta2.FDBLocalityDataHallKey] = dataHall + } + + res[fdbv1beta2.ProcessGroupID(zoneID)] = processInfo } } diff --git a/docs/manual/fault_domains.md b/docs/manual/fault_domains.md index b2843b74d..62f0029bd 100644 --- a/docs/manual/fault_domains.md +++ b/docs/manual/fault_domains.md @@ -98,6 +98,65 @@ spec: This strategy uses the pod name as the fault domain, which allows each process to act as a separate failure domain. Any hardware failure could lead to a complete loss of the cluster. This configuration should not be used in any production environment. +## Three-Data-Hall Replication + +The [three-data-hall](https://apple.github.io/foundationdb/configuration.html#single-datacenter-modes) replication can be use to replicate data across three data halls, or availability zones. +This requires that your fault domains are properly labeled on the Kubernetes nodes. +Most cloud-providers will use the well-known label [topology.kubernetes.io/zone](https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone) for this. +When creating a three-data-hall replicated FoundationDBCluster on Kubernetes we have to create 3 `FoundationDBCluster` resources. +**NOTE**: This is a limitation of the current approach not to read any information from the Kubernetes nodes and simplify the scheduling logic of the operator. +In the future, this might change and the deployment model for a three-data-hall FoundationDB cluster will be simplified. + +We have to start with a simple `FoundationDBCluster` that is running in one single availability zone, e.g. `az1`: + +```yaml +apiVersion: apps.foundationdb.org/v1beta2 +kind: FoundationDBCluster +metadata: + name: sample-cluster-az1 +spec: + version: 7.1.26 + spec: + processGroupIDPrefix: az1 + dataHall: az1 + databaseConfiguration: + redundancyMode: triple + processes: + general: + podTemplate: + spec: + nodeSelector: + "topology.kubernetes.io/zone": "az1" +``` + +Once the cluster is reconciled and running we can change the `redundancyMode` to `three_data_hall`. +For the other two created `FoundationDBCluster` resources you have to set the `seedConnectionString` to the current connection string of the `FoundationDBCluster` resource in az1. +The cluster will be stuck in a reconciling state until we add the other two configurations (each one for `az1`, `az2` and `az3`: + +```yaml +apiVersion: apps.foundationdb.org/v1beta2 +kind: FoundationDBCluster +metadata: + name: sample-cluster-az1 +spec: + version: 7.1.26 + spec: + dataHall: az1 + processGroupIDPrefix: az1 + databaseConfiguration: + redundancyMode: three_data_hall + seedConnectionString: "" + processes: + general: + podTemplate: + spec: + nodeSelector: + "topology.kubernetes.io/zone": "az1" +``` + +Once all three `FoundationDBCluster` resources are marked for reconciliation the FoundationDB cluster is up and running. +You can run this configuration in the same namespace, different namespaces of even across multiple different Kubernetes clusters. + ## Multi-Region Replication The replication strategies above all describe how data is replicated within a data center. diff --git a/internal/locality/locality.go b/internal/locality/locality.go index da2cb928f..4b72ad976 100644 --- a/internal/locality/locality.go +++ b/internal/locality/locality.go @@ -155,6 +155,9 @@ func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, process fields := constraint.Fields if len(fields) == 0 { fields = []string{fdbv1beta2.FDBLocalityZoneIDKey, fdbv1beta2.FDBLocalityDCIDKey} + if cluster.Spec.DatabaseConfiguration.RedundancyMode == fdbv1beta2.RedundancyModeThreeDataHall { + fields = append(fields, fdbv1beta2.FDBLocalityDataHallKey) + } } chosenCounts := make(map[string]map[string]int, len(fields)) @@ -230,19 +233,34 @@ func ChooseDistributedProcesses(cluster *fdbv1beta2.FoundationDBCluster, process } } + if len(chosen) != count { + return chosen, notEnoughProcessesError{Desired: count, Chosen: len(chosen), Options: processes} + } + return chosen, nil } // GetHardLimits returns the distribution of localities. func GetHardLimits(cluster *fdbv1beta2.FoundationDBCluster) map[string]int { if cluster.Spec.DatabaseConfiguration.UsableRegions <= 1 { + if cluster.Spec.DatabaseConfiguration.RedundancyMode == fdbv1beta2.RedundancyModeThreeDataHall { + return map[string]int{ + // Assumption here is that we have 3 data halls and we want to spread the coordinators + // equally across those 3 data halls. + fdbv1beta2.FDBLocalityDataHallKey: 3, + fdbv1beta2.FDBLocalityZoneIDKey: 1, + } + } + return map[string]int{fdbv1beta2.FDBLocalityZoneIDKey: 1} } - // TODO (johscheuer): should we calculate that based on the number of DCs? - maxCoordinatorsPerDC := int(math.Floor(float64(cluster.DesiredCoordinatorCount()) / 2.0)) + maxCoordinatorsPerDC := int(math.Ceil(float64(cluster.DesiredCoordinatorCount()) / float64(cluster.Spec.DatabaseConfiguration.CountUniqueDataCenters()))) - return map[string]int{fdbv1beta2.FDBLocalityZoneIDKey: 1, fdbv1beta2.FDBLocalityDCIDKey: maxCoordinatorsPerDC} + return map[string]int{ + fdbv1beta2.FDBLocalityZoneIDKey: 1, + fdbv1beta2.FDBLocalityDCIDKey: maxCoordinatorsPerDC, + } } // CheckCoordinatorValidity determines if the cluster's current coordinators diff --git a/internal/locality/locality_test.go b/internal/locality/locality_test.go index 59d2ddab5..31b14e358 100644 --- a/internal/locality/locality_test.go +++ b/internal/locality/locality_test.go @@ -118,7 +118,7 @@ func generateDefaultStatus(tls bool) *fdbv1beta2.FoundationDBStatus { } } -var _ = Describe("Change coordinators", func() { +var _ = Describe("Localities", func() { var cluster *fdbv1beta2.FoundationDBCluster BeforeEach(func() { @@ -361,17 +361,87 @@ var _ = Describe("Change coordinators", func() { fdbv1beta2.FDBLocalityZoneIDKey: 1, }, ), - Entry("default cluster with two usable regiosn", + Entry("default cluster with two usable regions and 4 DCs", &fdbv1beta2.FoundationDBCluster{ Spec: fdbv1beta2.FoundationDBClusterSpec{ DatabaseConfiguration: fdbv1beta2.DatabaseConfiguration{ UsableRegions: 2, + Regions: []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: "dc1", + }, + { + ID: "dc2", + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: "dc3", + }, + { + ID: "dc4", + }, + }, + }, + }, + }, + }, + }, + map[string]int{ + fdbv1beta2.FDBLocalityZoneIDKey: 1, + fdbv1beta2.FDBLocalityDCIDKey: 3, + }, + ), + Entry("default cluster with two usable regions and 3 DCs", + &fdbv1beta2.FoundationDBCluster{ + Spec: fdbv1beta2.FoundationDBClusterSpec{ + DatabaseConfiguration: fdbv1beta2.DatabaseConfiguration{ + UsableRegions: 2, + Regions: []fdbv1beta2.Region{ + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: "dc1", + }, + { + ID: "dc2", + }, + }, + }, + { + DataCenters: []fdbv1beta2.DataCenter{ + { + ID: "dc3", + }, + { + ID: "dc2", + }, + }, + }, + }, }, }, }, map[string]int{ fdbv1beta2.FDBLocalityZoneIDKey: 1, - fdbv1beta2.FDBLocalityDCIDKey: 4, + fdbv1beta2.FDBLocalityDCIDKey: 3, + }, + ), + Entry("default cluster with one usable region and three data hall", + &fdbv1beta2.FoundationDBCluster{ + Spec: fdbv1beta2.FoundationDBClusterSpec{ + DatabaseConfiguration: fdbv1beta2.DatabaseConfiguration{ + RedundancyMode: fdbv1beta2.RedundancyModeThreeDataHall, + }, + }, + }, + map[string]int{ + fdbv1beta2.FDBLocalityDataHallKey: 3, + fdbv1beta2.FDBLocalityZoneIDKey: 1, }, ), ) diff --git a/internal/monitor_conf.go b/internal/monitor_conf.go index a221467cb..660f44668 100644 --- a/internal/monitor_conf.go +++ b/internal/monitor_conf.go @@ -205,7 +205,7 @@ func GetMonitorProcessConfiguration(cluster *fdbv1beta2.FoundationDBCluster, pro }, }) configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{ArgumentType: monitorapi.ConcatenateArgumentType, Values: []monitorapi.Argument{ - {Value: "--locality_process_id="}, + {Value: getKnobParameter(fdbv1beta2.FDBLocalityProcessIDKey, true)}, {ArgumentType: monitorapi.EnvironmentArgumentType, Source: "FDB_INSTANCE_ID"}, {Value: "-"}, {ArgumentType: monitorapi.ProcessNumberArgumentType}, @@ -216,15 +216,15 @@ func GetMonitorProcessConfiguration(cluster *fdbv1beta2.FoundationDBCluster, pro configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{ArgumentType: monitorapi.ConcatenateArgumentType, Values: []monitorapi.Argument{ - {Value: "--locality_instance_id="}, + {Value: getKnobParameter(fdbv1beta2.FDBLocalityInstanceIDKey, true)}, {ArgumentType: monitorapi.EnvironmentArgumentType, Source: "FDB_INSTANCE_ID"}, }}, monitorapi.Argument{ArgumentType: monitorapi.ConcatenateArgumentType, Values: []monitorapi.Argument{ - {Value: "--locality_machineid="}, + {Value: getKnobParameter(fdbv1beta2.FDBLocalityMachineIDKey, true)}, {ArgumentType: monitorapi.EnvironmentArgumentType, Source: "FDB_MACHINE_ID"}, }}, monitorapi.Argument{ArgumentType: monitorapi.ConcatenateArgumentType, Values: []monitorapi.Argument{ - {Value: "--locality_zoneid="}, + {Value: getKnobParameter(fdbv1beta2.FDBLocalityZoneIDKey, true)}, {ArgumentType: monitorapi.EnvironmentArgumentType, Source: zoneVariable}, }}, ) @@ -234,7 +234,7 @@ func GetMonitorProcessConfiguration(cluster *fdbv1beta2.FoundationDBCluster, pro } if cluster.Spec.MainContainer.PeerVerificationRules != "" { - configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: fmt.Sprintf("--tls_verify_peers=%s", cluster.Spec.MainContainer.PeerVerificationRules)}) + configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: getKnobParameterWithValue("tls_verify_peers", cluster.Spec.MainContainer.PeerVerificationRules, false)}) } podSettings := cluster.GetProcessSettings(processClass) @@ -254,11 +254,11 @@ func GetMonitorProcessConfiguration(cluster *fdbv1beta2.FoundationDBCluster, pro } if cluster.Spec.DataCenter != "" { - configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: fmt.Sprintf("--locality_dcid=%s", cluster.Spec.DataCenter)}) + configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: getKnobParameterWithValue(fdbv1beta2.FDBLocalityDCIDlKey, cluster.Spec.DataCenter, true)}) } if cluster.Spec.DataHall != "" { - configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: fmt.Sprintf("--locality_data_hall=%s", cluster.Spec.DataHall)}) + configuration.Arguments = append(configuration.Arguments, monitorapi.Argument{Value: getKnobParameterWithValue(fdbv1beta2.FDBLocalityDataHallKey, cluster.Spec.DataHall, true)}) } if cluster.DefineDNSLocalityFields() { @@ -271,6 +271,25 @@ func GetMonitorProcessConfiguration(cluster *fdbv1beta2.FoundationDBCluster, pro return configuration, nil } +// getKnobParameter will return the knob parameter with a trailing =. If the provided knob is a locality the key will be +// prefixed with "locality_". +func getKnobParameter(key string, isLocality bool) string { + var sb strings.Builder + sb.WriteString("--") + if isLocality { + sb.WriteString("locality_") + } + sb.WriteString(key) + sb.WriteString("=") + + return sb.String() +} + +// getKnobParameterWithValue is the same as getKnobParameter but will append the value at the end. +func getKnobParameterWithValue(key string, value string, isLocality bool) string { + return getKnobParameter(key, isLocality) + value +} + // buildIPArgument builds an argument that takes an IP address from an environment variable func buildIPArgument(parameter string, environmentVariable string, imageType FDBImageType, sampleAddresses []fdbv1beta2.ProcessAddress) []monitorapi.Argument { var leftIPWrap string