Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for three data hall replication #1651

Merged
merged 10 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions api/v1beta2/foundationdb_database_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
type DatabaseConfiguration struct {
// RedundancyMode defines the core replication factor for the database.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum=single;double;triple
// +kubebuilder:validation:Enum=single;double;triple;three_data_hall
// +kubebuilder:default:double
RedundancyMode RedundancyMode `json:"redundancy_mode,omitempty"`

Expand Down Expand Up @@ -211,6 +211,19 @@ func (configuration DatabaseConfiguration) NormalizeConfiguration() DatabaseConf
return *result
}

// CountUniqueDataCenters returns the number of unique data centers based on the desired DatabaseConfiguration.
func (configuration *DatabaseConfiguration) CountUniqueDataCenters() int {
uniqueDataCenters := map[string]None{}

for _, region := range configuration.Regions {
for _, dc := range region.DataCenters {
uniqueDataCenters[dc.ID] = None{}
}
}

return len(uniqueDataCenters)
}

// NormalizeConfigurationWithSeparatedProxies ensures a standardized
// format and defaults when comparing database configuration in the
// cluster spec with database configuration in the cluster status,
Expand Down Expand Up @@ -284,7 +297,7 @@ func (configuration *DatabaseConfiguration) GetRoleCountsWithDefaults(version Ve
counts.Storage = 2*faultTolerance + 1
}
if counts.Logs == 0 {
counts.Logs = 3
counts.Logs = configuration.RedundancyMode.getDefaultLogCount()
}

if version.HasSeparatedProxies() {
Expand Down Expand Up @@ -597,9 +610,11 @@ func (configuration DatabaseConfiguration) FillInDefaultsFromStatus() DatabaseCo
if result.RemoteLogs == 0 {
result.RemoteLogs = -1
}

if result.LogRouters == 0 {
result.LogRouters = -1
}

return *result
}

Expand Down Expand Up @@ -678,7 +693,7 @@ func DesiredFaultTolerance(redundancyMode RedundancyMode) int {
return 0
case RedundancyModeDouble, RedundancyModeUnset:
return 1
case RedundancyModeTriple:
case RedundancyModeTriple, RedundancyModeThreeDataHall:
return 2
default:
return 0
Expand All @@ -694,6 +709,8 @@ func MinimumFaultDomains(redundancyMode RedundancyMode) int {
return 2
case RedundancyModeTriple:
return 3
case RedundancyModeThreeDataHall:
return 4
default:
return 1
}
Expand All @@ -710,6 +727,8 @@ const (
RedundancyModeDouble RedundancyMode = "double"
// RedundancyModeTriple defines the replication factor 3.
RedundancyModeTriple RedundancyMode = "triple"
// RedundancyModeThreeDataHall defines the replication factor three_data_hall.
RedundancyModeThreeDataHall RedundancyMode = "three_data_hall"
johscheuer marked this conversation as resolved.
Show resolved Hide resolved
// RedundancyModeOneSatelliteSingle defines the replication factor one_satellite_single.
RedundancyModeOneSatelliteSingle RedundancyMode = "one_satellite_single"
// RedundancyModeOneSatelliteDouble defines the replication factor one_satellite_double.
Expand All @@ -718,6 +737,15 @@ const (
RedundancyModeUnset RedundancyMode = ""
)

// getDefaultLogCount returns the default log count for the provided redundancy mode
func (redundancyMode RedundancyMode) getDefaultLogCount() int {
if redundancyMode == RedundancyModeThreeDataHall {
return 4
}

return 3
}

// StorageEngine defines the storage engine for the database
// +kubebuilder:validation:MaxLength=100
type StorageEngine string
Expand Down
36 changes: 36 additions & 0 deletions api/v1beta2/foundationdb_database_configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,42 @@ var _ = Describe("DatabaseConfiguration", func() {
})
})

When("a three_data_hall cluster with the default values is provided", func() {
var cluster *FoundationDBCluster

BeforeEach(func() {
cluster = &FoundationDBCluster{
Spec: FoundationDBClusterSpec{
Version: "7.1.33",
DataHall: "az1",
ProcessCounts: ProcessCounts{
Stateless: -1,
},
DatabaseConfiguration: DatabaseConfiguration{
StorageEngine: StorageEngineSSD,
RedundancyMode: RedundancyModeThreeDataHall,
UsableRegions: 1,
},
},
}
})

When("getting the default process counts", func() {
var err error
var counts ProcessCounts

BeforeEach(func() {
counts, err = cluster.GetProcessCountsWithDefaults()
})

It("It should calculate the default process counts", func() {
Expect(err).NotTo(HaveOccurred())
Expect(counts.Log).To(BeNumerically("==", 6)) // 4 required + 2 additional
Expect(counts.Storage).To(BeNumerically("==", 5))
})
})
})

When("using ProcessCounts", func() {
When("calculating the total number of processes", func() {
var counts ProcessCounts
Expand Down
12 changes: 12 additions & 0 deletions api/v1beta2/foundationdb_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ const (
// the zone ID.
FDBLocalityZoneIDKey = "zoneid"

// FDBLocalityMachineIDKey represents the key in the locality map that holds
// the machine ID.
FDBLocalityMachineIDKey = "machineid"

// FDBLocalityDCIDKey represents the key in the locality map that holds
// the DC ID.
FDBLocalityDCIDKey = "dcid"
Expand All @@ -86,4 +90,12 @@ const (

// FDBLocalityExclusionPrefix represents the exclusion prefix for locality based exclusions.
FDBLocalityExclusionPrefix = "locality_instance_id"

// FDBLocalityDataHallKey represents the key in the locality map that holds
// the data hall.
FDBLocalityDataHallKey = "data_hall"

// FDBLocalityDCIDlKey represents the key in the locality map that holds
// the data center ID.
FDBLocalityDCIDlKey = "dcid"
)
7 changes: 3 additions & 4 deletions api/v1beta2/foundationdbcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1268,7 +1268,7 @@ func (cluster *FoundationDBCluster) GetProcessSettings(processClass ProcessClass
// The default Storage value will be 2F + 1, where F is the cluster's fault
// tolerance.
//
// The default Logs value will be 3.
// The default Logs value will be 3 or 4 for three_data_hall.
//
// The default Proxies value will be 3.
//
Expand Down Expand Up @@ -1430,10 +1430,9 @@ func (cluster *FoundationDBCluster) MinimumFaultDomains() int {
return MinimumFaultDomains(cluster.Spec.DatabaseConfiguration.RedundancyMode)
}

// DesiredCoordinatorCount returns the number of coordinators to recruit for
// a cluster.
// DesiredCoordinatorCount returns the number of coordinators to recruit for a cluster.
func (cluster *FoundationDBCluster) DesiredCoordinatorCount() int {
if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 {
if cluster.Spec.DatabaseConfiguration.UsableRegions > 1 || cluster.Spec.DatabaseConfiguration.RedundancyMode == RedundancyModeThreeDataHall {
return 9
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10355,6 +10355,7 @@ spec:
- single
- double
- triple
- three_data_hall
maxLength: 100
type: string
regions:
Expand Down Expand Up @@ -14090,6 +14091,7 @@ spec:
- single
- double
- triple
- three_data_hall
maxLength: 100
type: string
regions:
Expand Down
21 changes: 21 additions & 0 deletions config/tests/three_data_hall/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Three-Data-hall example

This example requires that your Kubernetes cluster has nodes which are labeled with `topology.kubernetes.io/zone`.
The example requires at least 3 unique zones, those can be faked for testing, by adding the labels to a node.
If you want to use cloud provider specific zone label values you can set the `AZ1`, `AZ2` and `AZ3` environment variables.

## Create the Three-Data-Hall cluster

This will bring up a FDB cluster using the three-data-hall redundancy mode.

```bash
./create.bash
```

## Delete

This will remove all created resources:

```bash
./delete.bash
```
34 changes: 34 additions & 0 deletions config/tests/three_data_hall/create.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

set -eu

# This directory provides an example of creating a cluster using the three_data_hall
# replication topology.
#
# This example is built for local testing, so it will create all of the Pods
# within a single Kubernetes cluster, but will give false locality information for the zones
# to make the processes believe they are in different locations.
#
# You can use this script to bootstrap the cluster. Once it finishes, you can
# make changes to the cluster by editing the final.yaml file and running the
# apply.bash script. You can clean up the clusters by running the delete.bash
# script.
DIR="${BASH_SOURCE%/*}"

. $DIR/functions.bash

AZ1=${AZ1:-"az1"}
AZ2=${AZ2:-"az2"}
AZ3=${AZ3:-"az3"}

applyFile "${DIR}/stage_1.yaml" "${AZ1}" '""'
checkReconciliationLoop test-cluster-${AZ1}
connectionString=$(getConnectionString test-cluster-${AZ1})

applyFile "${DIR}/final.yaml" "${AZ1}" "${connectionString}"
applyFile "${DIR}/final.yaml" ${AZ2} "${connectionString}"
applyFile "${DIR}/final.yaml" ${AZ3} "${connectionString}"

checkReconciliationLoop test-cluster-${AZ1}
checkReconciliationLoop test-cluster-${AZ2}
checkReconciliationLoop test-cluster-${AZ3}
5 changes: 5 additions & 0 deletions config/tests/three_data_hall/delete.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

set -eu

kubectl delete fdb -l cluster-group=test-cluster
40 changes: 40 additions & 0 deletions config/tests/three_data_hall/final.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This file provides an example of a cluster you can run in a local testing
# environment to create a simulation of a three_data_hall cluster.
#
# This requires variables to be interpolated for $az and $connectionString
apiVersion: apps.foundationdb.org/v1beta2
kind: FoundationDBCluster
metadata:
labels:
cluster-group: test-cluster
name: test-cluster-$az
spec:
version: 7.1.26
faultDomain:
key: foundationdb.org/none
processGroupIDPrefix: $az
dataHall: $az
processCounts:
stateless: -1
seedConnectionString: $connectionString
databaseConfiguration:
redundancy_mode: "three_data_hall"
processes:
general:
customParameters:
- "knob_disable_posix_kernel_aio=1"
volumeClaimTemplate:
spec:
resources:
requests:
storage: "16G"
podTemplate:
spec:
nodeSelector:
"topology.kubernetes.io/zone": "$az"
containers:
- name: foundationdb
resources:
requests:
cpu: 250m
memory: 128Mi
29 changes: 29 additions & 0 deletions config/tests/three_data_hall/functions.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
function applyFile() {
az=${2}

az="${az}" connectionString="${3}" envsubst < "${1}"| kubectl apply -f -
}

function checkReconciliation() {
clusterName=$1

generationsOutput=$(kubectl get fdb "${clusterName}" -o jsonpath='{.metadata.generation} {.status.generations.reconciled}')
read -ra generations <<< "${generationsOutput}"
if [[ ("${#generations[@]}" -ge 2) && ("${generations[0]}" == "${generations[1]}") ]]; then
return 1
else
echo "Latest generations for $clusterName: $generationsOutput"
return 0
fi
}

function getConnectionString() {
kubectl get fdb "${1}" -o jsonpath='{.status.connectionString}'
}

function checkReconciliationLoop() {
while checkReconciliation "${1}" ; do
echo "Waiting for reconciliation"
sleep 5
done
}
40 changes: 40 additions & 0 deletions config/tests/three_data_hall/stage_1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This file provides an example of a cluster you can run in a local testing
# environment to create a simulation of a three_data_hall cluster.
#
# This requires variables to be interpolated for $az and $connectionString
apiVersion: apps.foundationdb.org/v1beta2
kind: FoundationDBCluster
metadata:
labels:
cluster-group: test-cluster
name: test-cluster-$az
spec:
version: 7.1.26
faultDomain:
key: foundationdb.org/none
processGroupIDPrefix: $az
dataHall: $az
processCounts:
stateless: -1
seedConnectionString: $connectionString
databaseConfiguration:
redundancy_mode: "triple"
processes:
general:
customParameters:
- "knob_disable_posix_kernel_aio=1"
volumeClaimTemplate:
spec:
resources:
requests:
storage: "16G"
podTemplate:
spec:
nodeSelector:
"topology.kubernetes.io/zone": "$az"
containers:
- name: foundationdb
resources:
requests:
cpu: 250m
memory: 128Mi
2 changes: 1 addition & 1 deletion controllers/change_coordinators.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func selectCoordinators(logger logr.Logger, cluster *fdbv1beta2.FoundationDBClus
HardLimits: locality.GetHardLimits(cluster),
})

logger.Info("Current coordinators", "coordinators", coordinators)
logger.Info("Current coordinators", "coordinators", coordinators, "error", err)
if err != nil {
return candidates, err
}
Expand Down
Loading
Loading