-
Notifications
You must be signed in to change notification settings - Fork 47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(esClientDrain): enhance Drain ES Client function #168
base: master
Are you sure you want to change the base?
Changes from all commits
2cd4257
ece1c4f
559e287
d33bd55
2895705
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,12 +17,12 @@ import ( | |
v1 "k8s.io/api/core/v1" | ||
) | ||
|
||
// TODO make configurable as flags. | ||
var ( | ||
defaultRetryCount = 999 | ||
defaultRetryWaitTime = 10 * time.Second | ||
defaultRetryMaxWaitTime = 30 * time.Second | ||
) | ||
// Restry Configuration | ||
type RetryConfig struct { | ||
ClientRetryCount int | ||
ClientRetryWaitTime time.Duration | ||
ClientRetryMaxWaitTime time.Duration | ||
} | ||
|
||
// ESClient is a pod drainer which can drain data from Elasticsearch pods. | ||
type ESClient struct { | ||
|
@@ -92,7 +92,7 @@ func (c *ESClient) logger() *log.Entry { | |
} | ||
|
||
// Drain drains data from an Elasticsearch pod. | ||
func (c *ESClient) Drain(ctx context.Context, pod *v1.Pod) error { | ||
func (c *ESClient) Drain(ctx context.Context, pod *v1.Pod, config *RetryConfig) error { | ||
|
||
c.logger().Info("Ensuring cluster is in green state") | ||
|
||
|
@@ -112,11 +112,15 @@ func (c *ESClient) Drain(ctx context.Context, pod *v1.Pod) error { | |
} | ||
|
||
c.logger().Info("Waiting for draining to finish") | ||
return c.waitForEmptyEsNode(ctx, pod) | ||
return c.waitForEmptyEsNode(ctx, pod, config) | ||
} | ||
|
||
func (c *ESClient) Cleanup(ctx context.Context) error { | ||
|
||
// prevent ESClient from execute another operations on excludeIPList in ES | ||
c.mux.Lock() | ||
defer c.mux.Unlock() | ||
|
||
// 1. fetch IPs from _cat/nodes | ||
nodes, err := c.GetNodes() | ||
if err != nil { | ||
|
@@ -204,13 +208,14 @@ func (c *ESClient) getClusterSettings() (*ESSettings, error) { | |
// adds the podIP to Elasticsearch exclude._ip list | ||
func (c *ESClient) excludePodIP(pod *v1.Pod) error { | ||
|
||
// prevent ESClient from execute another operations on excludeIPList in ES | ||
c.mux.Lock() | ||
defer c.mux.Unlock() | ||
|
||
podIP := pod.Status.PodIP | ||
|
||
esSettings, err := c.getClusterSettings() | ||
if err != nil { | ||
c.mux.Unlock() | ||
return err | ||
} | ||
|
||
|
@@ -221,6 +226,7 @@ func (c *ESClient) excludePodIP(pod *v1.Pod) error { | |
if excludeString != "" { | ||
ips = strings.Split(excludeString, ",") | ||
} | ||
|
||
var foundPodIP bool | ||
for _, ip := range ips { | ||
if ip == podIP { | ||
|
@@ -234,7 +240,6 @@ func (c *ESClient) excludePodIP(pod *v1.Pod) error { | |
err = c.setExcludeIPs(strings.Join(ips, ",")) | ||
} | ||
|
||
c.mux.Unlock() | ||
return err | ||
} | ||
|
||
|
@@ -257,6 +262,45 @@ func (c *ESClient) setExcludeIPs(ips string) error { | |
return nil | ||
} | ||
|
||
// remove the podIP from Elasticsearch exclude._ip list | ||
func (c *ESClient) undoExcludePodIP(pod *v1.Pod) error { | ||
|
||
// prevent ESClient from execute another operations on excludeIPList in ES | ||
c.mux.Lock() | ||
defer c.mux.Unlock() | ||
s-vkropotko marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
podIP := pod.Status.PodIP | ||
|
||
esSettings, err := c.getClusterSettings() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
excludedIPsString := esSettings.Transient.Cluster.Routing.Allocation.Exclude.IP | ||
excludedIPs := strings.Split(excludedIPsString, ",") | ||
|
||
// create a new array with excludedIP without provided Pod IP address | ||
var newExcludedIPs []string | ||
for _, excludeIP := range excludedIPs { | ||
if excludeIP != podIP { | ||
newExcludedIPs = append(newExcludedIPs, excludeIP) | ||
sort.Strings(newExcludedIPs) | ||
} | ||
} | ||
|
||
newExcludedIPsString := strings.Join(newExcludedIPs, ",") | ||
if newExcludedIPsString != excludedIPsString { | ||
c.logger().Infof("Setting exclude list to '%s'", newExcludedIPsString) | ||
|
||
err = c.setExcludeIPs(newExcludedIPsString) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (c *ESClient) updateAutoRebalance(value string) error { | ||
resp, err := resty.New().R(). | ||
SetHeader("Content-Type", "application/json"). | ||
|
@@ -277,23 +321,26 @@ func (c *ESClient) updateAutoRebalance(value string) error { | |
} | ||
|
||
// repeatedly query shard allocations to ensure success of drain operation. | ||
func (c *ESClient) waitForEmptyEsNode(ctx context.Context, pod *v1.Pod) error { | ||
func (c *ESClient) waitForEmptyEsNode(ctx context.Context, pod *v1.Pod, config *RetryConfig) error { | ||
// TODO: implement context handling | ||
podIP := pod.Status.PodIP | ||
_, err := resty.New(). | ||
SetRetryCount(defaultRetryCount). | ||
SetRetryWaitTime(defaultRetryWaitTime). | ||
SetRetryMaxWaitTime(defaultRetryMaxWaitTime). | ||
resp, err := resty.New(). | ||
SetRetryCount(config.ClientRetryCount). | ||
SetRetryWaitTime(config.ClientRetryWaitTime). | ||
SetRetryMaxWaitTime(config.ClientRetryMaxWaitTime). | ||
AddRetryCondition( | ||
// It is expected to return (bool, error) pair. Resty will retry | ||
// in case condition returns true or non nil error. | ||
func(r *resty.Response) (bool, error) { | ||
if !r.IsSuccess() { | ||
return true, nil | ||
} | ||
|
||
var shards []ESShard | ||
err := json.Unmarshal(r.Body(), &shards) | ||
if err != nil { | ||
return true, err | ||
} | ||
// shardIP := make(map[string]bool) | ||
remainingShards := 0 | ||
for _, shard := range shards { | ||
if shard.IP == podIP { | ||
|
@@ -313,9 +360,32 @@ func (c *ESClient) waitForEmptyEsNode(ctx context.Context, pod *v1.Pod) error { | |
}, | ||
).R(). | ||
Get(c.Endpoint.String() + "/_cat/shards?h=index,ip&format=json") | ||
|
||
if err != nil { | ||
return err | ||
} | ||
|
||
if !resp.IsSuccess() { | ||
return fmt.Errorf("HTTP endpoint responded with not expected status code %d", resp.StatusCode()) | ||
} | ||
|
||
var shards []ESShard | ||
err = json.Unmarshal(resp.Body(), &shards) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
for _, shard := range shards { | ||
if shard.IP == podIP { | ||
err = fmt.Errorf("Cannot migrate shards from pod '%s' with IP '%s' within provided intervals", pod.ObjectMeta.Name, pod.Status.PodIP) | ||
// if we cannot remove node than return it back active nodes pool | ||
if errExclude := c.undoExcludePodIP(pod); errExclude != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While I understand the motivation for this, I'm not sure this is the best place to handle the issue you describe.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I don't understand what do you suggest. The idea of the current code to provide users the ability to stop infinity waiting for draining (this can happen mostly because of configuration error between EDS and ES indices or some problems with pods/cluster).
Am I right that you suggest adding an additional setting for this retry? or you suggest to rid of the retry with Resty lib?
So do you suggest just wrap
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mikkeloscar hey, could you please check my comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mikkeloscar I would be good with this behaviour because it unblocks other cluster operations, but the users should have alerting on "Cannot migrate shards from pod" in the logs, because apparently there's some wrong configuration in the cluster to be fixed manually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey, I'm sorry for not following up here earlier. My main point was that the code added here in my mind is operator code and should belong in the logic of Right now, what would happen on a drain timeout is that the drain would be undone, by the new code added here, and the next time the operator loop runs, it would select the exact same pod again, because it has the drain annotation, and it would again start draining, so you basically don't win anything by having it here is my point. One clear problem that I see with the current behavior (before this PR) is that on scale-out we don't stop the draining of the pod and then undo the drain, which would make sense if we were only draining the pod for scaledown previously. However, if the pod is being drained because of a node being shut down, then draining is exactly what we want, and it IMO shouldn't be stopped. Apart from the scale-out scenario, I don't completely follow the idea of stopping the "infinite" drain in case the cluster is misconfigured. What would be the alternative? To stop the draining and then do what? I agree with @otrosien that the operators/owners of the EDS should have some sort of alert on these kind of misconfigurations, because it's not something the es-operator is able to handle by itself, so from my perspective it's best it just tries to drain and then a human can get alerted if this drain takes longer that expected (maybe we can create better logs or other metrics, to make it easier to create such alerts?). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey, sorry for the delayed response.
In the PR implementation, it will work as before (I didn't change existed Retry configuration) but we will have a graceful exit(ref 1) from failed Drain. Also, users will have the ability to configure the custom timeout for Drain which is useful for cases with regular scale in/down operations(ref 2), like scaling EDS by Cronjobs. So according to your first comment, I think I can leave a fix for 1) and for 2) - add function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, we can add an additional parameter to control behavior in the case of |
||
return fmt.Errorf("during handling request error: '%v' another error has been raised '%v'", err, errExclude) | ||
} | ||
return err | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -457,7 +527,7 @@ func (c *ESClient) CreateIndex(indexName, groupName string, shards, replicas int | |
SetHeader("Content-Type", "application/json"). | ||
SetBody([]byte( | ||
fmt.Sprintf( | ||
`{"settings": {"index" : {"number_of_replicas" : "%d", "number_of_shards": "%d", | ||
`{"settings": {"index" : {"number_of_replicas" : "%d", "number_of_shards": "%d", | ||
"routing.allocation.include.group": "%s"}}}`, | ||
replicas, | ||
shards, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: let's pass the
RetryConfig
from here instead of individual arguments.