Skip to content

Commit

Permalink
Merge pull request #713 from cybozu-go/test-too-long-repair
Browse files Browse the repository at this point in the history
Add test for too long repair execution
  • Loading branch information
morimoto-cybozu authored Mar 27, 2024
2 parents 269dc52 + aeb4ae1 commit 9d615b6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
1 change: 1 addition & 0 deletions mtest/cke-cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ repair:
- operation: "op1"
repair_steps:
- repair_command: ["sh", "-c", "touch /tmp/mtest-repair-$1", "repair"]
command_timeout_seconds: 30
need_drain: true
watch_seconds: 30
health_check_command: ["sh", "-c", "test -f /tmp/mtest-repair-$1 && echo true", "health_check"]
Expand Down
31 changes: 30 additions & 1 deletion mtest/repair_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,34 @@ func testRepairOperations() {
ckecliSafe("repair-queue", "delete-finished")
waitRepairEmpty(cluster)

By("setting non-returning repair command")
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].RepairCommand = []string{"sh", "-c", "exec sleep infinity", "sleep-infinity"}
_, err = ckecliClusterSet(cluster)
Expect(err).NotTo(HaveOccurred())
time.Sleep(time.Second * 3)

repairQueueAdd(node1)
waitRepairFailure(cluster)

ckecliSafe("repair-queue", "delete-finished")
waitRepairEmpty(cluster)

By("setting non-returning repair command and long command timeout")
originalCommandTimeoutSeconds := cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].CommandTimeoutSeconds

longCommandTimeout := 90 // > (timeout of repairShouldNotProceed())
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].CommandTimeoutSeconds = &longCommandTimeout
_, err = ckecliClusterSet(cluster)
Expect(err).NotTo(HaveOccurred())
time.Sleep(time.Second * 3)

repairQueueAdd(node1)
repairShouldNotProceed()

time.Sleep(time.Second * time.Duration(longCommandTimeout)) // wait for CKE to update the queue entry
ckecliSafe("repair-queue", "delete-finished")
waitRepairEmpty(cluster)

By("setting noop repair command")
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].RepairCommand = []string{"true"}
_, err = ckecliClusterSet(cluster)
Expand Down Expand Up @@ -155,8 +183,9 @@ func testRepairOperations() {
ckecliSafe("repair-queue", "delete", strconv.Itoa(currentWriteIndex-1))
waitRepairEmpty(cluster)

By("restoring repair command and watch duration")
By("restoring repair command, command timeout, and watch duration")
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].RepairCommand = originalRepairCommand
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].CommandTimeoutSeconds = originalCommandTimeoutSeconds
cluster.Repair.RepairProcedures[0].RepairOperations[0].RepairSteps[0].WatchSeconds = originalWatchSeconds
_, err = ckecliClusterSet(cluster)
Expect(err).NotTo(HaveOccurred())
Expand Down

0 comments on commit 9d615b6

Please sign in to comment.