Skip to content

Commit

Permalink
MESH-1743 Merge pull request istio-ecosystem#133 from services-mesh/M…
Browse files Browse the repository at this point in the history
…ESH-1743-DR-bk

MESH-1743-DR
  • Loading branch information
sa authored and GitHub Enterprise committed Jun 27, 2022
2 parents f1a43a4 + b8dd675 commit 47c0dab
Show file tree
Hide file tree
Showing 31 changed files with 1,048 additions and 97 deletions.
1 change: 0 additions & 1 deletion DESIGN.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# Admiral

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,4 @@ gen-yaml:
cp ./install/sample/gtp_topology.yaml ./out/yaml/gtp_topology.yaml
cp ./install/sample/grpc-client.yaml ./out/yaml/grpc-client.yaml
cp ./install/prometheus/prometheus.yaml ./out/yaml/prometheus.yaml
cp ./install/scripts/*.sh ./out/scripts/
cp ./install/scripts/*.sh ./out/scripts/
4 changes: 4 additions & 0 deletions admiral/cmd/admiral/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ func GetRootCmd(args []string) *cobra.Command {
rootCmd.PersistentFlags().StringVar(&params.SecretResolverConfigPath, "secret_resolver_config_path", "/etc/config/resolver_config.yaml",
"Path to the secret resolver config")
rootCmd.PersistentFlags().BoolVar(&params.MetricsEnabled, "metrics", true, "Enable prometheus metrics collections")
rootCmd.PersistentFlags().StringVar(&params.AdmiralStateCheckerName,"admiral_state_checker_name","NoOPStateChecker","The value of the admiral_state_checker_name label to configure the DR Strategy for Admiral")
rootCmd.PersistentFlags().StringVar(&params.DRStateStoreConfigPath,"dr_state_store_config_path","","Location of config file which has details for data store. Ex:- Dynamo DB connection details")
rootCmd.PersistentFlags().StringVar(&params.ServiceEntryIPPrefix,"se_ip_prefix","240.0","IP prefix for the auto generated IPs for service entries. Only the first two octets: Eg- 240.0")


return rootCmd
}
Expand Down
34 changes: 31 additions & 3 deletions admiral/pkg/apis/admiral/routes/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"

"github.com/gorilla/mux"
Expand All @@ -27,10 +28,37 @@ type IdentityServiceEntry struct {
ClusterNames []string `json:"Clusters,omitempty"`
}

func (opts *RouteOpts) ReturnSuccessGET(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
response := fmt.Sprintf("Heath check method called: %v, URI: %v, Method: %v\n", r.Host, r.RequestURI, r.Method)
/*
We expect the DNS health checker to include the query param checkifreadonly with value set to true.
The query param is used to check if the current Admiral instance is running in Active Mode or Passive Mode (also called read only mode).
If Running in passive mode, the health check returns 502 which forces DNS lookup to always return reference to Admiral in Active state.
*/

func (opts *RouteOpts) ReturnSuccessGET(w http.ResponseWriter, r *http.Request) {
allQueryParams:= r.URL.Query()
checkIfReadOnlyStringVal := allQueryParams.Get("checkifreadonly")
//Remove all spaces
checkIfReadOnlyStringVal = strings.ReplaceAll(checkIfReadOnlyStringVal," ","")
// checkIfReadOnlyStringVal will be empty in case ""checkifreadonly" query param is not sent in the request. checkIfReadOnlyBoolVal will be false
checkIfReadOnlyBoolVal, err := strconv.ParseBool(checkIfReadOnlyStringVal)
var response string
if len(checkIfReadOnlyStringVal) ==0 || nil==err {
if checkIfReadOnlyBoolVal{
admiralState := opts.RemoteRegistry.AdmiralState
if(*admiralState).ReadOnly{
//Force fail health check if Admiral is in Readonly mode
w.WriteHeader(503)
}else {
w.WriteHeader(200)
}
}else {
w.WriteHeader(200)
}
response = fmt.Sprintf("Heath check method called: %v, URI: %v, Method: %v\n", r.Host, r.RequestURI, r.Method)
}else {
w.WriteHeader(400)
response = fmt.Sprintf("Health check method called with bad query param value %v for checkifreadonly",checkIfReadOnlyStringVal)
}
_, writeErr := w.Write([]byte(response))
if writeErr != nil {
log.Printf("Error writing body: %v", writeErr)
Expand Down
47 changes: 47 additions & 0 deletions admiral/pkg/clusters/DRUtil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package clusters

import (
"context"
"github.com/istio-ecosystem/admiral/admiral/pkg/controller/common"
log "github.com/sirupsen/logrus"
"strings"
)
const ReadWriteEnabled = false
const ReadOnlyEnabled = true;

type AdmiralState struct {
ReadOnly bool
}

type AdmiralStateChecker interface {
runStateCheck(ctx context.Context,as *AdmiralState)
shouldRunOnIndependentGoRoutine() bool
}
/*
Utility function to start Admiral DR checks.
DR checks can be run either on the main go routine or a new go routine
*/
func RunAdmiralStateCheck(ctx context.Context,asc AdmiralStateChecker, as *AdmiralState){
log.Infof("Starting DR checks")
if asc.shouldRunOnIndependentGoRoutine() {
log.Info("Starting Admiral State Checker on a new Go Routine")
go asc.runStateCheck(ctx,as)
}else {
log.Infof("Starting Admiral State Checker on existing Go Routine")
asc.runStateCheck(ctx,as)
}
}

/*
utility function to identify the Admiral DR implementation based on the program parameters
*/
func startAdmiralStateChecker (ctx context.Context,params common.AdmiralParams,as *AdmiralState){
var admiralStateChecker AdmiralStateChecker
switch strings.ToLower(params.AdmiralStateCheckerName) {
case "dynamodbbasedstatechecker":
admiralStateChecker = DynamoDBBasedStateChecker{params.DRStateStoreConfigPath}
default:
admiralStateChecker = NoOPStateChecker{}
}
RunAdmiralStateCheck(ctx,admiralStateChecker,as)
}
109 changes: 109 additions & 0 deletions admiral/pkg/clusters/DynamoDBBasedStateCheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package clusters

import (
"context"
log "github.com/sirupsen/logrus"
"time"
)

/*
The skip lease pod can be used for testing DynamoDB based DR.
Update the podname field to "SKIP-LEASE-POD" to test Admiral pods in passive mode.
*/
const SKIP_LEASE_CHECK_POD_NAME = "SKIP-LEASE-POD"

type DynamoDBBasedStateChecker struct {
drConfigFileLocation string
}

func (DynamoDBBasedStateChecker) shouldRunOnIndependentGoRoutine() bool{
return true;
}

/*
This method has the logic to update the ReadOnly field within the AdmiralState object based on the lease obtained on the shared lock object
The AdmiralState object is referenced everywhere in the code before trying to create/update/delete Istio custom objects
Below is the logic for Admiral instance in Active state
1. Get the latest lease information from DynamoDB table
2. If the current pod owns the lease, update the last updated field with current timestamp
3. Update ReadOnly field to false.
4. Sleep for configured duration
5. Admiral instance which is constantly monitoring all the clusters for changes and is responsible to creating , updating and deleting the Istio custom objects
like Service Entry, Destination rule, Virtual Service , Sidecar and others.
Below is the logic for Admiral instance in Passive state
1. Get the latest lease information from DynamoDB table
2. If the current pod does not own the lease, check if the last updated time field is within the configured wait threshold.
3. If the last updated time field is older than the computed threshold, update self as the owner of the lease with current timestamp as last updated time
4. If the last updated time field is within the computed threshold,mark current pod as read only
5. Sleep for configured duration
*/
func (dr DynamoDBBasedStateChecker) runStateCheck(ctx context.Context,as *AdmiralState){
as.ReadOnly = ReadOnlyEnabled
var dynamodbClient *DynamoClient
dynamoDBConfig,err := BuildDynamoDBConfig(dr.drConfigFileLocation)
if nil!= err {
log.Error("DynamoDR: Could not start DynamoDBBasedStateChecker ", err)
panic("Could not start DynamoDBBasedStateChecker")
}
dynamodbClient = NewDynamoClient(dynamoDBConfig)
waitDuration := time.Duration(dynamoDBConfig.WaitTimeInSeconds) * time.Second
ticker := time.NewTicker(waitDuration)
tickChan := ticker.C

for {
select {
case <-ctx.Done():
log.Infoln("DynamoDR: context done stopping ticker")
ticker.Stop()

case <-tickChan:
ExecuteStateCheck(dynamoDBConfig, dynamodbClient, as)
}
}
}

func ExecuteStateCheck(dynamoDBConfig DynamoDBConfig, dynamodbClient *DynamoClient ,as *AdmiralState){
leaseName := dynamoDBConfig.LeaseName
podIdentifier := dynamoDBConfig.PodIdentifier
waitTimeInSeconds :=dynamoDBConfig.WaitTimeInSeconds
failureThreshold := dynamoDBConfig.FailureThreshold
log.Infof("DynamoDR: CurrentPod = %v LeaseName = %v WaitTime= %v sec tableName= %v role= %v region= %v" ,podIdentifier, leaseName, waitTimeInSeconds, dynamoDBConfig.TableName,dynamoDBConfig.Role,dynamoDBConfig.Region)

currentTime := time.Now().UTC().Unix()
log.Infof("DynamoDR: Retrieving latest value of read write value for leaseName : %v , timestamp : %v " , leaseName,currentTime )
readWriteLeases, err := dynamodbClient.getReadWriteLease()
if nil!=err{
log.WithFields(log.Fields{
"error": err.Error(),
}).Error("DynamoDR: Error retrieving the latest lease")
}
readWriteLease := filterOrCreateLeaseIfNotFound(readWriteLeases,leaseName)
if "" == readWriteLease.LeaseOwner {
log.Infof("DynamoDR: Lease with name=%v does not exist. Creating a new lease with owner=%v" , leaseName,podIdentifier)
readWriteLease.LeaseOwner = podIdentifier
readWriteLease.UpdatedTime = currentTime
dynamodbClient.updatedReadWriteLease(readWriteLease,dynamoDBConfig.TableName)
//Not updating read-write mode until we confirm this pod has the lease
}else if SKIP_LEASE_CHECK_POD_NAME == readWriteLease.LeaseOwner {
log.Info("DynamoDR: Lease held by skip lease check pod. Setting Admiral to read only mode")
as.ReadOnly = ReadOnlyEnabled;
}else if podIdentifier == readWriteLease.LeaseOwner {
as.ReadOnly = ReadWriteEnabled
log.Infof("DynamoDR: Lease with name=%v is owned by the current pod. Extending lease ownership till %v. Admiral will write",leaseName, currentTime)
readWriteLease.UpdatedTime = currentTime
dynamodbClient.updatedReadWriteLease(readWriteLease,dynamoDBConfig.TableName)
}else if readWriteLease.UpdatedTime < (currentTime - int64(waitTimeInSeconds*failureThreshold)){
diffSecs := currentTime -readWriteLease.UpdatedTime
log.Infof("DynamoDR: Current time %v is more than the lastUpdated time of lease %v by %v sec. Taking over the lease from %v.",currentTime, readWriteLease.UpdatedTime,diffSecs, readWriteLease.LeaseOwner)
readWriteLease.LeaseOwner = podIdentifier
readWriteLease.UpdatedTime = currentTime
dynamodbClient.updatedReadWriteLease(readWriteLease,dynamoDBConfig.TableName)
//Not updating read-write mode until we confirm this pod has the lease
}else {
log.Infof("DynamoDR: Lease held by %v till %v . Admiral will not write ", readWriteLease.LeaseOwner, readWriteLease.UpdatedTime)
as.ReadOnly = ReadOnlyEnabled;
}

}
Loading

0 comments on commit 47c0dab

Please sign in to comment.