From 2f7a92f9052da739c5079e50934d6503ffa2cfb9 Mon Sep 17 00:00:00 2001 From: Fabian Kramm Date: Thu, 1 Aug 2024 11:22:13 +0200 Subject: [PATCH 1/2] fix: k8s startup issue --- pkg/k8s/k8s.go | 54 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/pkg/k8s/k8s.go b/pkg/k8s/k8s.go index dd5aea7df..5bea38569 100644 --- a/pkg/k8s/k8s.go +++ b/pkg/k8s/k8s.go @@ -3,8 +3,8 @@ package k8s import ( "context" "crypto/tls" - "errors" "fmt" + "io" "net/http" "os/exec" "strings" @@ -17,6 +17,7 @@ import ( "github.com/loft-sh/vcluster/pkg/pro" "github.com/loft-sh/vcluster/pkg/util/commandwriter" "golang.org/x/sync/errgroup" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" ) @@ -136,9 +137,9 @@ func StartK8S( } // wait for api server to be up as otherwise controller and scheduler might fail - isUp := waitForAPI(ctx) - if !isUp { - return errors.New("waited until timeout for the api to be up, but it never did") + err := waitForAPI(ctx) + if err != nil { + return fmt.Errorf("waited until timeout for the api to be up: %w", err) } // start controller command @@ -216,7 +217,7 @@ func StartK8S( // regular stop case, will return as soon as a component returns an error. // we don't expect the components to stop by themselves since they're supposed // to run until killed or until they fail - err := eg.Wait() + err = eg.Wait() if err == nil || err.Error() == "signal: killed" { return nil } @@ -244,30 +245,47 @@ func RunCommand(ctx context.Context, command []string, component string) error { // waits for the api to be up, ignoring certs and calling it // localhost -func waitForAPI(ctx context.Context) bool { +func waitForAPI(ctx context.Context) error { client := &http.Client{ Timeout: 2 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } + // sometimes the etcd pod takes a very long time to be ready, // we might want to fine tune how long we wait later - for i := 0; i < 60; i++ { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://127.0.0.1:6443/version", nil) + var lastErr error + err := wait.PollUntilContextTimeout(ctx, time.Second*2, time.Minute*5, true, func(ctx context.Context) (done bool, err error) { + // build the request + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://127.0.0.1:6443/readyz", nil) if err != nil { + lastErr = err klog.Errorf("could not create the request to wait for the api: %s", err.Error()) + return false, nil } - _, err = client.Do(req) - switch { - case errors.Is(err, nil): - return true - case errors.Is(err, context.Canceled): - return false - default: - klog.Info("error while targeting the api on localhost, this is expected during the vcluster creation, will retry after 2 seconds:", err) - time.Sleep(time.Second * 2) + + // do the request + response, err := client.Do(req) + if err != nil { + lastErr = err + klog.Info("error while targeting the api on localhost, this is expected during the vCluster creation, will retry after 2 seconds:", err) + return false, nil } + + // check if we got a ok response status code + if response.StatusCode != http.StatusOK { + bytes, _ := io.ReadAll(response.Body) + klog.FromContext(ctx).Info("api server not ready yet", "reason", string(bytes)) + lastErr = fmt.Errorf("api server not ready yet, reason: %s", string(bytes)) + return false, nil + } + + return true, nil + }) + if err != nil { + return fmt.Errorf("error waiting for API server: %v%w", lastErr, err) } - return false + + return nil } From dbbde514c7fb95a4e0fcf9851a9a3bf4ce4033e9 Mon Sep 17 00:00:00 2001 From: Fabian Kramm Date: Thu, 1 Aug 2024 11:25:29 +0200 Subject: [PATCH 2/2] chore: create backport without merge requirement --- .github/workflows/backport.yaml | 4 ++-- pkg/k8s/k8s.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/backport.yaml b/.github/workflows/backport.yaml index 2fa841097..fe856dca0 100644 --- a/.github/workflows/backport.yaml +++ b/.github/workflows/backport.yaml @@ -2,12 +2,12 @@ name: Automatic backport action on: pull_request_target: - types: ["labeled", "closed"] + types: ["labeled"] jobs: backport: name: Backport PR - if: github.event.pull_request.merged == true && !(contains(github.event.pull_request.labels.*.name, 'backport')) + if: !(contains(github.event.pull_request.labels.*.name, 'backport')) runs-on: ubuntu-latest steps: - name: Backport Action diff --git a/pkg/k8s/k8s.go b/pkg/k8s/k8s.go index 5bea38569..4c2b92170 100644 --- a/pkg/k8s/k8s.go +++ b/pkg/k8s/k8s.go @@ -284,7 +284,7 @@ func waitForAPI(ctx context.Context) error { return true, nil }) if err != nil { - return fmt.Errorf("error waiting for API server: %v%w", lastErr, err) + return fmt.Errorf("error waiting for API server: %w", lastErr) } return nil