Skip to content

Commit

Permalink
Initial support for three data hall replication
Browse files Browse the repository at this point in the history
  • Loading branch information
johscheuer committed May 30, 2023
1 parent 7ec1d8e commit fe562f1
Show file tree
Hide file tree
Showing 17 changed files with 543 additions and 33 deletions.
34 changes: 31 additions & 3 deletions api/v1beta2/foundationdb_database_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
type DatabaseConfiguration struct {
// RedundancyMode defines the core replication factor for the database.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=single;double;triple
// +kubebuilder:validation:Enum=single;double;triple;three_data_hall
// +kubebuilder:default:double
RedundancyMode RedundancyMode `json:"redundancy_mode,omitempty"`

Expand Down Expand Up @@ -211,6 +211,19 @@ func (configuration DatabaseConfiguration) NormalizeConfiguration() DatabaseConf
return *result
}

// CountUniqueDataCenters returns the number of unique data centers based on the desired DatabaseConfiguration.
func (configuration *DatabaseConfiguration) CountUniqueDataCenters() int {
uniqueDataCenters := map[string]None{}

for _, region := range configuration.Regions {
for _, dc := range region.DataCenters {
uniqueDataCenters[dc.ID] = None{}
}
}

return len(uniqueDataCenters)
}

// NormalizeConfigurationWithSeparatedProxies ensures a standardized
// format and defaults when comparing database configuration in the
// cluster spec with database configuration in the cluster status,
Expand Down Expand Up @@ -284,7 +297,7 @@ func (configuration *DatabaseConfiguration) GetRoleCountsWithDefaults(version Ve
counts.Storage = 2*faultTolerance + 1
}
if counts.Logs == 0 {
counts.Logs = 3
counts.Logs = configuration.RedundancyMode.getDefaultLogCount()
}

if version.HasSeparatedProxies() {
Expand Down Expand Up @@ -597,9 +610,11 @@ func (configuration DatabaseConfiguration) FillInDefaultsFromStatus() DatabaseCo
if result.RemoteLogs == 0 {
result.RemoteLogs = -1
}

if result.LogRouters == 0 {
result.LogRouters = -1
}

return *result
}

Expand Down Expand Up @@ -678,7 +693,7 @@ func DesiredFaultTolerance(redundancyMode RedundancyMode) int {
return 0
case RedundancyModeDouble, RedundancyModeUnset:
return 1
case RedundancyModeTriple:
case RedundancyModeTriple, RedundancyModeThreeDataHall:
return 2
default:
return 0
Expand All @@ -694,6 +709,8 @@ func MinimumFaultDomains(redundancyMode RedundancyMode) int {
return 2
case RedundancyModeTriple:
return 3
case RedundancyModeThreeDataHall:
return 4
default:
return 1
}
Expand All @@ -710,6 +727,8 @@ const (
RedundancyModeDouble RedundancyMode = "double"
// RedundancyModeTriple defines the replication factor 3.
RedundancyModeTriple RedundancyMode = "triple"
// RedundancyModeThreeDataHall defines the replication factor three_data_hall.
RedundancyModeThreeDataHall RedundancyMode = "three_data_hall"
// RedundancyModeOneSatelliteSingle defines the replication factor one_satellite_single.
RedundancyModeOneSatelliteSingle RedundancyMode = "one_satellite_single"
// RedundancyModeOneSatelliteDouble defines the replication factor one_satellite_double.
Expand All @@ -718,6 +737,15 @@ const (
RedundancyModeUnset RedundancyMode = ""
)

// getDefaultLogCount returns the default log count for the provided redundancy mode
func (redundancyMode RedundancyMode) getDefaultLogCount() int {
if redundancyMode == RedundancyModeThreeDataHall {
return 4
}

return 3
}

// StorageEngine defines the storage engine for the database
// +kubebuilder:validation:MaxLength=100
type StorageEngine string
Expand Down
36 changes: 36 additions & 0 deletions api/v1beta2/foundationdb_database_configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,42 @@ var _ = Describe("DatabaseConfiguration", func() {
})
})

When("a three_data_hall cluster with the default values is provided", func() {
var cluster *FoundationDBCluster

BeforeEach(func() {
cluster = &FoundationDBCluster{
Spec: FoundationDBClusterSpec{
Version: "7.1.33",
DataHall: "az1",
ProcessCounts: ProcessCounts{
Stateless: -1,
},
DatabaseConfiguration: DatabaseConfiguration{
StorageEngine: StorageEngineSSD,
RedundancyMode: RedundancyModeThreeDataHall,
UsableRegions: 1,
},
},
}
})

When("getting the default process counts", func() {
var err error
var counts ProcessCounts

BeforeEach(func() {
counts, err = cluster.GetProcessCountsWithDefaults()
})

It("It should calculate the default process counts", func() {
Expect(err).NotTo(HaveOccurred())
Expect(counts.Log).To(BeNumerically("==", 6)) // 4 required + 2 additional
Expect(counts.Storage).To(BeNumerically("==", 5))
})
})
})

When("using ProcessCounts", func() {
When("calculating the total number of processes", func() {
var counts ProcessCounts
Expand Down
12 changes: 12 additions & 0 deletions api/v1beta2/foundationdb_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ const (
// the zone ID.
FDBLocalityZoneIDKey = "zoneid"

// FDBLocalityMachineIDKey represents the key in the locality map that holds
// the machine ID.
FDBLocalityMachineIDKey = "machineid"

// FDBLocalityDCIDKey represents the key in the locality map that holds
// the DC ID.
FDBLocalityDCIDKey = "dcid"
Expand All @@ -76,4 +80,12 @@ const (
// FDBLocalityProcessIDKey represents the key in the locality map that
// holds the process ID.
FDBLocalityProcessIDKey = "process_id"

// FDBLocalityDataHallKey represents the key in the locality map that holds
// the data hall.
FDBLocalityDataHallKey = "data_hall"

// FDBLocalityDCIDlKey represents the key in the locality map that holds
// the data center ID.
FDBLocalityDCIDlKey = "dcid"
)
7 changes: 3 additions & 4 deletions api/v1beta2/foundationdbcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ func (cluster *FoundationDBCluster) GetProcessSettings(processClass ProcessClass
// The default Storage value will be 2F + 1, where F is the cluster's fault
// tolerance.
//
// The default Logs value will be 3.
// The default Logs value will be 3 or 4 for three_data_hall.
//
// The default Proxies value will be 3.
//
Expand Down Expand Up @@ -1289,10 +1289,9 @@ func (cluster *FoundationDBCluster) MinimumFaultDomains() int {
return MinimumFaultDomains(cluster.Spec.DatabaseConfiguration.RedundancyMode)
}

// DesiredCoordinatorCount returns the number of coordinators to recruit for
// a cluster.
// DesiredCoordinatorCount returns the number of coordinators to recruit for a cluster.
func (cluster *FoundationDBCluster) DesiredCoordinatorCount() int {
if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 {
if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 || cluster.Spec.DatabaseConfiguration.RedundancyMode == RedundancyModeThreeDataHall {
return 9
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10079,6 +10079,7 @@ spec:
- single
- double
- triple
- three_data_hall
maxLength: 100
type: string
regions:
Expand Down Expand Up @@ -13704,6 +13705,7 @@ spec:
- single
- double
- triple
- three_data_hall
maxLength: 100
type: string
regions:
Expand Down
21 changes: 21 additions & 0 deletions config/tests/three_data_hall/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Three-Data-hall example

This example requires that your Kubernetes cluster has nodes which are labeled with `topology.kubernetes.io/zone`.
The example requires at least 3 unique zones, those can be faked for testing, by adding the labels to a node.
If you want to use cloud provider specific zone label values you can set the `AZ1`, `AZ2` and `AZ3` environment variables.

## Create the Three-Data-Hall cluster

This will bring up a FDB cluster using the three-data-hall redundancy mode.

```bash
./create.bash
```

## Delete

This will remove all created resources:

```bash
./delete.bash
```
34 changes: 34 additions & 0 deletions config/tests/three_data_hall/create.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

set -eu

# This directory provides an example of creating a cluster using the three_data_hall
# replication topology.
#
# This example is built for local testing, so it will create all of the Pods
# within a single Kubernetes cluster, but will give false locality information for the zones
# to make the processes believe they are in different locations.
#
# You can use this script to bootstrap the cluster. Once it finishes, you can
# make changes to the cluster by editing the final.yaml file and running the
# apply.bash script. You can clean up the clusters by running the delete.bash
# script.
DIR="${BASH_SOURCE%/*}"

. $DIR/functions.bash

AZ1=${AZ1:-"az1"}
AZ2=${AZ2:-"az2"}
AZ3=${AZ3:-"az3"}

applyFile "${DIR}/stage_1.yaml" "${AZ1}" '""'
checkReconciliationLoop test-cluster-${AZ1}
connectionString=$(getConnectionString test-cluster-${AZ1})

applyFile "${DIR}/final.yaml" "${AZ1}" "${connectionString}"
applyFile "${DIR}/final.yaml" ${AZ2} "${connectionString}"
applyFile "${DIR}/final.yaml" ${AZ3} "${connectionString}"

checkReconciliationLoop test-cluster-${AZ1}
checkReconciliationLoop test-cluster-${AZ2}
checkReconciliationLoop test-cluster-${AZ3}
5 changes: 5 additions & 0 deletions config/tests/three_data_hall/delete.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

set -eu

kubectl delete fdb -l cluster-group=test-cluster
40 changes: 40 additions & 0 deletions config/tests/three_data_hall/final.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This file provides an example of a cluster you can run in a local testing
# environment to create a simulation of a three_data_hall cluster.
#
# This requires variables to be interpolated for $az and $connectionString
apiVersion: apps.foundationdb.org/v1beta2
kind: FoundationDBCluster
metadata:
labels:
cluster-group: test-cluster
name: test-cluster-$az
spec:
version: 7.1.26
faultDomain:
key: foundationdb.org/none
processGroupIDPrefix: $az
dataHall: $az
processCounts:
stateless: -1
seedConnectionString: $connectionString
databaseConfiguration:
redundancy_mode: "three_data_hall"
processes:
general:
customParameters:
- "knob_disable_posix_kernel_aio=1"
volumeClaimTemplate:
spec:
resources:
requests:
storage: "16G"
podTemplate:
spec:
nodeSelector:
"topology.kubernetes.io/zone": "$az"
containers:
- name: foundationdb
resources:
requests:
cpu: 250m
memory: 128Mi
29 changes: 29 additions & 0 deletions config/tests/three_data_hall/functions.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
function applyFile() {
az=${2}

az="${az}" connectionString="${3}" envsubst < "${1}"| kubectl apply -f -
}

function checkReconciliation() {
clusterName=$1

generationsOutput=$(kubectl get fdb "${clusterName}" -o jsonpath='{.metadata.generation} {.status.generations.reconciled}')
read -ra generations <<< "${generationsOutput}"
if [[ ("${#generations[@]}" -ge 2) && ("${generations[0]}" == "${generations[1]}") ]]; then
return 1
else
echo "Latest generations for $clusterName: $generationsOutput"
return 0
fi
}

function getConnectionString() {
kubectl get fdb "${1}" -o jsonpath='{.status.connectionString}'
}

function checkReconciliationLoop() {
while checkReconciliation "${1}" ; do
echo "Waiting for reconciliation"
sleep 5
done
}
40 changes: 40 additions & 0 deletions config/tests/three_data_hall/stage_1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This file provides an example of a cluster you can run in a local testing
# environment to create a simulation of a three_data_hall cluster.
#
# This requires variables to be interpolated for $az and $connectionString
apiVersion: apps.foundationdb.org/v1beta2
kind: FoundationDBCluster
metadata:
labels:
cluster-group: test-cluster
name: test-cluster-$az
spec:
version: 7.1.26
faultDomain:
key: foundationdb.org/none
processGroupIDPrefix: $az
dataHall: $az
processCounts:
stateless: -1
seedConnectionString: $connectionString
databaseConfiguration:
redundancy_mode: "triple"
processes:
general:
customParameters:
- "knob_disable_posix_kernel_aio=1"
volumeClaimTemplate:
spec:
resources:
requests:
storage: "16G"
podTemplate:
spec:
nodeSelector:
"topology.kubernetes.io/zone": "$az"
containers:
- name: foundationdb
resources:
requests:
cpu: 250m
memory: 128Mi
2 changes: 1 addition & 1 deletion controllers/change_coordinators.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func selectCoordinators(logger logr.Logger, cluster *fdbv1beta2.FoundationDBClus
HardLimits: locality.GetHardLimits(cluster),
})

logger.Info("Current coordinators", "coordinators", coordinators)
logger.Info("Current coordinators", "coordinators", coordinators, "coordinatorCount", coordinatorCount)
if err != nil {
return candidates, err
}
Expand Down
Loading

0 comments on commit fe562f1

Please sign in to comment.