From e6ec7b45153ef0909079260277e9e70369eadf3e Mon Sep 17 00:00:00 2001 From: "Jeremy L. Morris" Date: Mon, 15 Jun 2020 09:18:49 -0400 Subject: [PATCH] Add webhook check for timeouts * Update docs to include Admission Controller Webhook Timeout check with fix * Update file naming to be more consistent for admission controller webhooks * Fix typo in webhook replacement struct name --- checks.md | 68 ++++++ ...dmission_controller_webhook_replacement.go | 12 +- ...on_controller_webhook_replacement_test.go} | 6 +- .../admission_controller_webhook_timeout.go | 84 +++++++ ...mission_controller_webhook_timeout_test.go | 229 ++++++++++++++++++ 5 files changed, 390 insertions(+), 9 deletions(-) rename checks/doks/{admission_controller_webhook_test.go => admission_controller_webhook_replacement_test.go} (98%) create mode 100644 checks/doks/admission_controller_webhook_timeout.go create mode 100644 checks/doks/admission_controller_webhook_timeout_test.go diff --git a/checks.md b/checks.md index b603e9e3..c2889fa8 100644 --- a/checks.md +++ b/checks.md @@ -371,6 +371,74 @@ webhooks: operator: "DoesNotExist" ``` +## Admission Controller Webhook Timeout + +- Name: `admission-controller-webhook-timeout` +- Groups: `doks` + +Admission control webhook timeouts can block upgrades, when the API call times out, due to an incorrectly configured TimeoutSeconds value. Since webhooks inherently add to API latency, we must stay within the recommended range in order for API requests to be successful. Specifically, this happens when an admission control webhook does not respond within 30 seconds. + +### Example + +```yaml +# Error: Configure a webhook with a TimeoutSeconds value greater than 30 seconds. +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: ValidatingWebhookConfiguration +metadata: + name: sample-webhook.example.com +webhooks: +- name: sample-webhook.example.com + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + scope: "Namespaced" + clientConfig: + service: + namespace: webhook + name: webhook-server + path: /pods + admissionReviewVersions: + - v1beta1 + timeoutSeconds: 60 +``` + +### How to Fix + +Set the TimeoutSeconds value to anything within the 1 to 30 second range. + +```yaml +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: ValidatingWebhookConfiguration +metadata: + name: sample-webhook.example.com +webhooks: +- name: sample-webhook.example.com + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + scope: "Namespaced" + clientConfig: + service: + namespace: webhook + name: webhook-server + path: /pods + admissionReviewVersions: + - v1beta1 + timeoutSeconds: 10 +``` + ## Pod State - Name: `pod-state` diff --git a/checks/doks/admission_controller_webhook_replacement.go b/checks/doks/admission_controller_webhook_replacement.go index 360e9b2a..a15f54e9 100644 --- a/checks/doks/admission_controller_webhook_replacement.go +++ b/checks/doks/admission_controller_webhook_replacement.go @@ -26,29 +26,29 @@ import ( ) func init() { - checks.Register(&webhookReplaementCheck{}) + checks.Register(&webhookReplacementCheck{}) } -type webhookReplaementCheck struct{} +type webhookReplacementCheck struct{} // Name returns a unique name for this check. -func (w *webhookReplaementCheck) Name() string { +func (w *webhookReplacementCheck) Name() string { return "admission-controller-webhook-replacement" } // Groups returns a list of group names this check should be part of. -func (w *webhookReplaementCheck) Groups() []string { +func (w *webhookReplacementCheck) Groups() []string { return []string{"doks"} } // Description returns a detailed human-readable description of what this check // does. -func (w *webhookReplaementCheck) Description() string { +func (w *webhookReplacementCheck) Description() string { return "Check for admission control webhooks that could cause problems during upgrades or node replacement" } // Run runs this check on a set of Kubernetes objects. -func (w *webhookReplaementCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) { +func (w *webhookReplacementCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) { const apiserverServiceName = "kubernetes" var diagnostics []checks.Diagnostic diff --git a/checks/doks/admission_controller_webhook_test.go b/checks/doks/admission_controller_webhook_replacement_test.go similarity index 98% rename from checks/doks/admission_controller_webhook_test.go rename to checks/doks/admission_controller_webhook_replacement_test.go index 95c463fb..6398371b 100644 --- a/checks/doks/admission_controller_webhook_test.go +++ b/checks/doks/admission_controller_webhook_replacement_test.go @@ -30,14 +30,14 @@ import ( var webhookURL = "https://example.com/webhook" func TestWebhookCheckMeta(t *testing.T) { - webhookCheck := webhookReplaementCheck{} + webhookCheck := webhookReplacementCheck{} assert.Equal(t, "admission-controller-webhook-replacement", webhookCheck.Name()) assert.Equal(t, []string{"doks"}, webhookCheck.Groups()) assert.NotEmpty(t, webhookCheck.Description()) } func TestWebhookCheckRegistration(t *testing.T) { - webhookCheck := &webhookReplaementCheck{} + webhookCheck := &webhookReplacementCheck{} check, err := checks.Get("admission-controller-webhook-replacement") assert.NoError(t, err) assert.Equal(t, check, webhookCheck) @@ -236,7 +236,7 @@ func TestWebhookError(t *testing.T) { }, } - webhookCheck := webhookReplaementCheck{} + webhookCheck := webhookReplacementCheck{} for _, test := range tests { t.Run(test.name, func(t *testing.T) { diff --git a/checks/doks/admission_controller_webhook_timeout.go b/checks/doks/admission_controller_webhook_timeout.go new file mode 100644 index 00000000..24c1bdab --- /dev/null +++ b/checks/doks/admission_controller_webhook_timeout.go @@ -0,0 +1,84 @@ +/* +Copyright 2020 DigitalOcean + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package doks + +import ( + "github.com/digitalocean/clusterlint/checks" + "github.com/digitalocean/clusterlint/kube" +) + +func init() { + checks.Register(&webhookTimeoutCheck{}) +} + +type webhookTimeoutCheck struct{} + +// Name returns a unique name for this check. +func (w *webhookTimeoutCheck) Name() string { + return "admission-controller-webhook-timeout" +} + +// Groups returns a list of group names this check should be part of. +func (w *webhookTimeoutCheck) Groups() []string { + return []string{"doks"} +} + +// Description returns a detailed human-readable description of what this check +// does. +func (w *webhookTimeoutCheck) Description() string { + return "Check for admission control webhooks that have exceeded a timeout of 30 seconds." +} + +// Run runs this check on a set of Kubernetes objects. +func (w *webhookTimeoutCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) { + var diagnostics []checks.Diagnostic + + for _, config := range objects.ValidatingWebhookConfigurations.Items { + for _, wh := range config.Webhooks { + if *wh.TimeoutSeconds >= int32(1) && *wh.TimeoutSeconds < int32(30) { + // Webhooks with TimeoutSeconds set: between 1 and 30 is fine. + continue + } + d := checks.Diagnostic{ + Severity: checks.Error, + Message: "Validating webhook with a TimeoutSeconds value greater than 30 seconds will block upgrades.", + Kind: checks.ValidatingWebhookConfiguration, + Object: &config.ObjectMeta, + Owners: config.ObjectMeta.GetOwnerReferences(), + } + diagnostics = append(diagnostics, d) + } + } + + for _, config := range objects.MutatingWebhookConfigurations.Items { + for _, wh := range config.Webhooks { + if *wh.TimeoutSeconds >= int32(1) && *wh.TimeoutSeconds < int32(30) { + // Webhooks with TimeoutSeconds set: between 1 and 30 is fine. + continue + } + d := checks.Diagnostic{ + Severity: checks.Error, + Message: "Mutating webhook with a TimeoutSeconds value greater than 30 seconds will block upgrades.", + Kind: checks.MutatingWebhookConfiguration, + Object: &config.ObjectMeta, + Owners: config.ObjectMeta.GetOwnerReferences(), + } + diagnostics = append(diagnostics, d) + } + } + return diagnostics, nil +} diff --git a/checks/doks/admission_controller_webhook_timeout_test.go b/checks/doks/admission_controller_webhook_timeout_test.go new file mode 100644 index 00000000..eafb854f --- /dev/null +++ b/checks/doks/admission_controller_webhook_timeout_test.go @@ -0,0 +1,229 @@ +/* +Copyright 2020 DigitalOcean + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package doks + +import ( + "testing" + + "github.com/digitalocean/clusterlint/checks" + "github.com/digitalocean/clusterlint/kube" + "github.com/stretchr/testify/assert" + ar "k8s.io/api/admissionregistration/v1beta1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestWebhookTimeoutCheckMeta(t *testing.T) { + webhookCheck := webhookTimeoutCheck{} + assert.Equal(t, "admission-controller-webhook-timeout", webhookCheck.Name()) + assert.Equal(t, []string{"doks"}, webhookCheck.Groups()) + assert.NotEmpty(t, webhookCheck.Description()) +} + +func TestWebhookTimeoutRegistration(t *testing.T) { + webhookCheck := &webhookTimeoutCheck{} + check, err := checks.Get("admission-controller-webhook-timeout") + assert.NoError(t, err) + assert.Equal(t, check, webhookCheck) +} + +func TestWebhookTimeoutError(t *testing.T) { + tests := []struct { + name string + objs *kube.Objects + expected []checks.Diagnostic + }{ + { + name: "no webhook configurations", + objs: &kube.Objects{ + MutatingWebhookConfigurations: &ar.MutatingWebhookConfigurationList{}, + ValidatingWebhookConfigurations: &ar.ValidatingWebhookConfigurationList{}, + }, + expected: nil, + }, + { + name: "TimeoutSeconds value is set to 10 seconds", + objs: webhookTimeoutTestObjects( + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + toIntP(10), + 2, + ), + expected: nil, + }, + { + name: "TimeoutSeconds value is set to 29 seconds", + objs: webhookTimeoutTestObjects( + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + toIntP(29), + 2, + ), + expected: nil, + }, + { + name: "TimeoutSeconds value is set to 30 seconds", + objs: webhookTimeoutTestObjects( + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + toIntP(30), + 2, + ), + expected: webhookTimeoutErrors(), + }, + { + name: "TimeoutSeconds value is set to 31 seconds", + objs: webhookTimeoutTestObjects( + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + toIntP(31), + 2, + ), + expected: webhookTimeoutErrors(), + }, + } + + webhookCheck := webhookTimeoutCheck{} + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + d, err := webhookCheck.Run(test.objs) + assert.NoError(t, err) + assert.ElementsMatch(t, test.expected, d) + }) + } +} + +func webhookTimeoutTestObjects( + clientConfig ar.WebhookClientConfig, + timeoutSeconds *int32, + numNodes int, +) *kube.Objects { + objs := &kube.Objects{ + SystemNamespace: &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "kube-system", + Labels: map[string]string{"doks_test_key": "bar"}, + }, + }, + Namespaces: &corev1.NamespaceList{ + Items: []corev1.Namespace{ + { + TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "kube-system", + Labels: map[string]string{"doks_test_key": "bar"}, + }, + }, + { + TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "webhook", + Labels: map[string]string{"doks_test_key": "xyzzy"}, + }, + }, + }, + }, + MutatingWebhookConfigurations: &ar.MutatingWebhookConfigurationList{ + Items: []ar.MutatingWebhookConfiguration{ + { + TypeMeta: metav1.TypeMeta{Kind: "MutatingWebhookConfiguration", APIVersion: "v1beta1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "mwc_foo", + }, + Webhooks: []ar.MutatingWebhook{ + { + Name: "mw_foo", + ClientConfig: clientConfig, + TimeoutSeconds: timeoutSeconds, + }, + }, + }, + }, + }, + ValidatingWebhookConfigurations: &ar.ValidatingWebhookConfigurationList{ + Items: []ar.ValidatingWebhookConfiguration{ + { + TypeMeta: metav1.TypeMeta{Kind: "ValidatingWebhookConfiguration", APIVersion: "v1beta1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "vwc_foo", + }, + Webhooks: []ar.ValidatingWebhook{ + { + Name: "vw_foo", + ClientConfig: clientConfig, + TimeoutSeconds: timeoutSeconds, + }, + }, + }, + }, + }, + } + + objs.Nodes = &corev1.NodeList{} + for i := 0; i < numNodes; i++ { + objs.Nodes.Items = append(objs.Nodes.Items, corev1.Node{}) + } + return objs +} + +func webhookTimeoutErrors() []checks.Diagnostic { + objs := webhookTimeoutTestObjects(ar.WebhookClientConfig{}, nil, 0) + validatingConfig := objs.ValidatingWebhookConfigurations.Items[0] + mutatingConfig := objs.MutatingWebhookConfigurations.Items[0] + + diagnostics := []checks.Diagnostic{ + { + Severity: checks.Error, + Message: "Validating webhook with a TimeoutSeconds value greater than 30 seconds will block upgrades.", + Kind: checks.ValidatingWebhookConfiguration, + Object: &validatingConfig.ObjectMeta, + Owners: validatingConfig.ObjectMeta.GetOwnerReferences(), + }, + { + Severity: checks.Error, + Message: "Mutating webhook with a TimeoutSeconds value greater than 30 seconds will block upgrades.", + Kind: checks.MutatingWebhookConfiguration, + Object: &mutatingConfig.ObjectMeta, + Owners: mutatingConfig.ObjectMeta.GetOwnerReferences(), + }, + } + return diagnostics +} + +// converts an int to an int32 and returns a pointer +func toIntP(i int) *int32 { + num := int32(i) + return &num +}