Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement non-graceful shutdown #2702

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ update-unbound:
sed -i -E '/name:.*unbound_exporter$$/!b;n;s/newTag:.*$$/newTag: $(UNBOUND_EXPORTER_VERSION)/' unbound/base/kustomization.yaml
bin/kustomize build unbound/$(UNBOUND_OVERLAY) > etc/unbound$(UNBOUND_CONFIG_SUFFIX).yaml

.PHONY: update-csi-addons
update-csi-addons:
rm -rf /tmp/work-csi-addons
mkdir -p /tmp/work-csi-addons
curl -o /tmp/work-csi-addons/crds.yaml -sSfL https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v${CSI_ADDONS_VERSION}/crds.yaml
curl -o /tmp/work-csi-addons/rbac.yaml -sSfL https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v${CSI_ADDONS_VERSION}/rbac.yaml
curl -o /tmp/work-csi-addons/setup-controller.yaml -sSfL https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v${CSI_ADDONS_VERSION}/setup-controller.yaml
yq /tmp/work-csi-addons/crds.yaml /tmp/work-csi-addons/rbac.yaml /tmp/work-csi-addons/setup-controller.yaml > etc/csi-addons.yaml
HELM := $(shell pwd)/bin/helm
.PHONY: helm
helm: $(HELM) ## Download helm locally if necessary.
Expand Down
1 change: 1 addition & 0 deletions Makefile.common
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUNC_VERSION = 1.1.13
K8S_VERSION = 1.29.7
HELM_VERSION = 3.15.3
STERN_VERSION = 1.30.0
CSI_ADDONS_VERSION = 0.11.0


## These should be updated regularly
Expand Down
2 changes: 2 additions & 0 deletions constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,15 @@ var (
filepath.Join(NecoDataDir, "coil.yaml"),
filepath.Join(NecoDataDir, "unbound.yaml"),
filepath.Join(NecoDataDir, "squid.yaml"),
filepath.Join(NecoDataDir, "csi-addons.yaml"),
}
CKEUserResourceFilesPre = []string{
filepath.Join(NecoDataDir, "namespaces.yml"),
filepath.Join(NecoDataDir, "cilium-pre.yaml"),
filepath.Join(NecoDataDir, "coil-pre.yaml"),
filepath.Join(NecoDataDir, "unbound-pre.yaml"),
filepath.Join(NecoDataDir, "squid-pre.yaml"),
filepath.Join(NecoDataDir, "csi-addons.yaml"),
}

NecoCertFile = filepath.Join(NecoDir, "etcd.crt")
Expand Down
2,467 changes: 2,467 additions & 0 deletions etc/csi-addons.yaml

Large diffs are not rendered by default.

15 changes: 9 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go 1.23.3
replace github.com/armon/go-metrics v0.5.3 => github.com/hashicorp/go-metrics v0.5.3

require (
github.com/csi-addons/kubernetes-csi-addons v0.10.0
github.com/cybozu-go/cke v1.29.0
github.com/cybozu-go/etcdutil v1.6.9
github.com/cybozu-go/log v1.7.0
Expand Down Expand Up @@ -37,10 +38,12 @@ require (
go.etcd.io/etcd/client/v3 v3.5.17
golang.org/x/crypto v0.29.0
golang.org/x/oauth2 v0.24.0
golang.org/x/sync v0.9.0
golang.org/x/term v0.26.0
k8s.io/api v0.31.0
k8s.io/apimachinery v0.31.0
k8s.io/client-go v0.31.0
k8s.io/api v0.31.1
k8s.io/apimachinery v0.31.1
k8s.io/client-go v0.31.1
sigs.k8s.io/controller-runtime v0.19.0
sigs.k8s.io/yaml v1.4.0
)

Expand All @@ -63,6 +66,7 @@ require (
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-jose/go-jose/v4 v4.0.1 // indirect
Expand Down Expand Up @@ -93,7 +97,7 @@ require (
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/hashicorp/logutils v1.0.0 // indirect
github.com/hashicorp/memberlist v0.5.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down Expand Up @@ -132,7 +136,6 @@ require (
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/net v0.31.0 // indirect
golang.org/x/sync v0.9.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.5.0 // indirect
Expand All @@ -145,7 +148,7 @@ require (
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/component-base v0.30.5 // indirect
k8s.io/component-base v0.31.0 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/kube-proxy v0.30.5 // indirect
Expand Down
34 changes: 23 additions & 11 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/csi-addons/kubernetes-csi-addons v0.10.0 h1:bBc6nb1oROz4RLhqoLFNeGymk2jIRXcx7LvAup9+3Jg=
github.com/csi-addons/kubernetes-csi-addons v0.10.0/go.mod h1:nqi369YuYMIdysBbHjtYJcWFpcxujPot1HS6tnNWBV4=
github.com/cybozu-go/cke v1.29.0 h1:TC6G8qDKpBBnlZf8j3uPryVA1KgBLjfBU5zyJD0Mbzg=
github.com/cybozu-go/cke v1.29.0/go.mod h1:QowZUhOu2ULS8I6Gfn6D7BT1Hh6DSUfD/p9k7xSLoX0=
github.com/cybozu-go/etcdutil v1.6.9 h1:3ZlKlZYrb8MfWRlx1q94yM4+SaFerRKyOGBccIleB2I=
Expand All @@ -80,6 +82,8 @@ github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryef
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
Expand All @@ -101,6 +105,8 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
Expand Down Expand Up @@ -198,8 +204,8 @@ github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
github.com/hashicorp/vault/api v1.15.0 h1:O24FYQCWwhwKnF7CuSqP30S51rTV7vz1iACXE/pj5DA=
github.com/hashicorp/vault/api v1.15.0/go.mod h1:+5YTO09JGn0u+b6ySD/LLVf8WkJCPLAL2Vkmrn2+CM8=
github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
Expand Down Expand Up @@ -501,6 +507,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
Expand All @@ -510,22 +518,24 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0=
gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo=
k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE=
k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc=
k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
k8s.io/component-base v0.30.5 h1:O6W8GfdBuyctVy7lu7I0yo8kB6bYgzGzjCyaagb2BR0=
k8s.io/component-base v0.30.5/go.mod h1:eliJtfE7RG18UHMWrqPQWodf1GnQVFGA6McNOHYi11g=
k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU=
k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI=
k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk=
k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk=
k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U=
k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0=
k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg=
k8s.io/component-base v0.31.0 h1:/KIzGM5EvPNQcYgwq5NwoQBaOlVFrghoVGr8lG6vNRs=
k8s.io/component-base v0.31.0/go.mod h1:TYVuzI1QmN4L5ItVdMSXKvH7/DtvIuas5/mm8YT3rTo=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
Expand All @@ -538,6 +548,8 @@ k8s.io/kubelet v0.30.5 h1:7jsJo1MkmGC3VEE0cf1tHiFKzi8DMBVNih3/OttSBaw=
k8s.io/kubelet v0.30.5/go.mod h1:vzllyQKrbFpTq0WOHV6yX0gUs6KqTwCBAO13cAearMc=
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 h1:b2FmK8YH+QEwq/Sy2uAEhmqL5nPfGYbJOcaqjeYYZoA=
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/controller-runtime v0.19.0 h1:nWVM7aq+Il2ABxwiCizrVDSlmDcshi9llbaFbC0ji/Q=
sigs.k8s.io/controller-runtime v0.19.0/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
Expand Down
59 changes: 59 additions & 0 deletions pkg/neco/cmd/non_graceful_shutdown.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package cmd

import (
"os"
"os/exec"

csiaddonsv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/api/csiaddons/v1alpha1"
"github.com/cybozu-go/neco"
"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var (
cephClusters = []string{"ceph-canary-block", "ceph-dotcom-block-0", "ceph-poc", "ceph-ssd"}
nonGracefulNodeShutdownConfig = "/tmp/non-graceful-node-shutdown-config"
)

var nonGracefulNodeShutdownCmd = &cobra.Command{
Use: "nonGracefulNodeShutdown",
Short: "nonGracefulNodeShutdown related commands",
Long: `nonGracefulNodeShutdown related commands.`,
}

func IssueAndLoadKubeconfigForNonGracefulNodeShutdown() (client.Client, error) {
scheme := runtime.NewScheme()
clientgoscheme.AddToScheme(scheme)
csiaddonsv1alpha1.AddToScheme(scheme)

stdout, err := os.Create(nonGracefulNodeShutdownConfig)
if err != nil {
return nil, err
}
defer stdout.Close()
issueCmd := exec.Command(neco.CKECLIBin, "kubernetes", "issue")
issueCmd.Stdout = stdout
err = issueCmd.Run()
if err != nil {
return nil, err
}
stdout.Sync()

config, err := clientcmd.BuildConfigFromFlags("", nonGracefulNodeShutdownConfig)
if err != nil {
return nil, err
}

kubeClient, err := client.New(config, client.Options{Scheme: scheme})
if err != nil {
return nil, err
}
return kubeClient, nil
}

func init() {
rootCmd.AddCommand(nonGracefulNodeShutdownCmd)
}
134 changes: 134 additions & 0 deletions pkg/neco/cmd/non_graceful_shutdown_cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package cmd

import (
"context"
"errors"
"fmt"
"os"
"os/signal"
"slices"
"strings"
"syscall"
"time"

csiaddonsv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/api/csiaddons/v1alpha1"
"github.com/cybozu-go/sabakan/v3"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var nonGracefulShutdownCleanupCmd = &cobra.Command{
Use: "cleanup IP_ADDRESS",
Short: "Cleanup non-graceful shutdowned node",
Long: `Remove NetworkFence and remove taint from the node if it is healthy`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
node := args[0]

ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()

kubeClient, err := IssueAndLoadKubeconfigForNonGracefulNodeShutdown()
if err != nil {
return err
}

//get sabakan status
opt := sabakanMachinesGetOpts{}
opt.params = map[string]*string{
"ipv4": &node,
}
machines, err := sabakanMachinesGet(ctx, &opt)
if err != nil {
return err
}
sabakanStatus := machines[0].Status.State

// remove networkfence
g := errgroup.Group{}
for _, cephClusterName := range cephClusters {
cephClusterName := cephClusterName
//check cephCluster exists
cephCluster := &unstructured.Unstructured{}
cephCluster.SetAPIVersion("ceph.rook.io/v1")
cephCluster.SetKind("CephCluster")
err := kubeClient.Get(ctx, client.ObjectKey{Name: cephClusterName, Namespace: cephClusterName}, cephCluster)
if err != nil {
if client.IgnoreNotFound(err) == nil {
fmt.Printf("CephCluster %s does not found\n", cephClusterName)
continue
} else {
return err
}
}
g.Go(func() error {
fenceName := cephClusterName + "-" + strings.Replace(node, ".", "-", -1)
networkFence := &csiaddonsv1alpha1.NetworkFence{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: fenceName}, networkFence)
if err != nil {
if client.IgnoreNotFound(err) == nil {
fmt.Printf("NetworkFence %s already removed\n", networkFence.Name)
return nil
} else {
return err
}
}
networkFence.Spec.FenceState = csiaddonsv1alpha1.Unfenced
networkFence.Status = csiaddonsv1alpha1.NetworkFenceStatus{}
err = kubeClient.Update(ctx, networkFence)
if err != nil {
return err
}

// wait for unfense of networkfence to be Succeeded
timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
for {
select {
case <-timeoutCtx.Done():
return errors.New("timeout waiting for networkfence to be unfenced")
default:
}
err := kubeClient.Get(ctx, client.ObjectKey{Name: fenceName}, networkFence)
if err != nil {
return err
}
if networkFence.Status.Result == csiaddonsv1alpha1.FencingOperationResultSucceeded {
break
}
time.Sleep(5 * time.Second)
}
err = kubeClient.Delete(ctx, networkFence)
if err != nil {
return err
}
return nil
})
}

if sabakanStatus == sabakan.StateHealthy {
kubernetesNode := &corev1.Node{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: node}, kubernetesNode)
if err != nil {
return err
}
for i, taint := range kubernetesNode.Spec.Taints {
if taint.Key == "node.kubernetes.io/out-of-service" {
kubernetesNode.Spec.Taints = slices.Delete(kubernetesNode.Spec.Taints, i, i+1)
}
}
err = kubeClient.Update(ctx, kubernetesNode)
if err != nil {
return err
}
}
return nil
},
}

func init() {
nonGracefulNodeShutdownCmd.AddCommand(nonGracefulShutdownCleanupCmd)
}
Loading